1. # -*- coding = ut-8 -*-
    2. # @Time : 2021/6/2 9:23
    3. # @Author : PHC
    4. # @File cjcyw_goods.py
    5. # @Software : PyCharm
    6. import json
    7. import requests
    8. import urllib
    9. import xlwt
    10. import time
    11. import os
    12. import shutil
    13. #每访问的地址都不一样,将地址传参的形式代入
    14. def messageAll(url):
    15. head={
    16. "Cookie":"cjcy_session_id=a7ff8f24-fcb2-43d3-80e4-53a7d466cea9",
    17. "User-Agent":"cicy/5.5 (iPhone; iOS 12.0; Scale/3.00)",
    18. "Content-Type": "application/x-www-form-urlencoded",
    19. "Host": "t.cjcyw.com"
    20. }
    21. request=urllib.request.Request(url,headers=head)
    22. response=urllib.request.urlopen(request)
    23. pages=response.read().decode("utf-8")
    24. contents=json.loads(pages)
    25. return contents
    26. def main():
    27. datalist = []
    28. pageNum=2 #查找的页数
    29. for num in range(1,pageNum+1): #左闭右开,打印1 ,2 ,3
    30. urlOne="https://t.cjcyw.com/cjcy/goods/list?_index={}".format(num) #所需要查找的页数,循环
    31. contents=messageAll(urlOne)
    32. content=contents['data']['records']
    33. for i in content:
    34. data=[]
    35. starDate = i['time1']
    36. loadTime = time.strftime("%Y-%m-%d", time.localtime(starDate))
    37. data.append(loadTime) # 装货日期
    38. starLoacation=i['dangqian']
    39. data.append(starLoacation) #始发港
    40. endLocation=i['mudidi']
    41. data.append(endLocation) #目的港
    42. cargo = i['title']
    43. data.append(cargo) # 货物
    44. aid=i['aid']
    45. dunwei=i['dunwei']
    46. data.append(dunwei) #货物重量
    47. #查看每条船的详情信息
    48. urlEnd="https://t.cjcyw.com/cjcy/goods/detail?aid={}".format(aid)
    49. contentEnd=messageAll(urlEnd)
    50. contentLast=contentEnd['data']
    51. name=contentLast['publishLinkMan']
    52. data.append(name) #名字
    53. phone=contentLast['publishTel']
    54. data.append(phone) #联系电话
    55. beizhu = contentLast['beizhu11']
    56. data.append(beizhu) #备注
    57. data.append("长江船运网-货源") #来源,加到最后
    58. datalist.append(data)
    59. print("第", num, "页,第", len(datalist), "条", data)
    60. timePass = time.localtime() # 输出系统的当前时间,当前时间为格林尼治时间? 秒数,
    61. timeNow = time.strftime("%Y-%m-%d", timePass) # 将系统的当前时间格式化输出,一天打印一次,若重复打印,最好将已输出的文件名字更改一下,避免数据丢失,重名会覆盖。
    62. savePath = "%s长江船运网-----货源.xls" % (timeNow) # 保存的文件命名格式:当前时间(年-月-日)+文件名
    63. table = xlwt.Workbook(encoding="utf-8", style_compression=0)
    64. sheet = table.add_sheet("货源信息-长江船运网", cell_overwrite_ok=True)
    65. col = ("装货日期", "起始地", "到达地", "货物类型", "货物重量", "名字", "联系电话", "备注","来源")
    66. for i in range(0, len(col)):
    67. sheet.write(0, i, col[i])
    68. for i in range(len(datalist)):
    69. oneShip=datalist[i]
    70. for j in range(0,len(col)):
    71. a=oneShip[j]
    72. sheet.write(i+1,j,a)
    73. table.save(savePath) #文件保存到当前目录
    74. nowDirectory=os.getcwd() #返回当前工作的目录
    75. file_path=os.path.join(nowDirectory,savePath) #返回当前目录中指定的文件。
    76. new_path=r'D:\长江船运网爬取文档\货源' #将文件保存到指定目录
    77. shutil.move(file_path,new_path)
    78. if __name__=='__main__':
    79. main()
    80. print("打印完毕")