# -*- coding = ut-8 -*-# @Time : 2021/6/2 9:23# @Author : PHC# @File cjcyw_goods.py# @Software : PyCharmimport jsonimport requestsimport urllibimport xlwtimport timeimport osimport shutil#每访问的地址都不一样,将地址传参的形式代入def messageAll(url): head={ "Cookie":"cjcy_session_id=a7ff8f24-fcb2-43d3-80e4-53a7d466cea9", "User-Agent":"cicy/5.5 (iPhone; iOS 12.0; Scale/3.00)", "Content-Type": "application/x-www-form-urlencoded", "Host": "t.cjcyw.com" } request=urllib.request.Request(url,headers=head) response=urllib.request.urlopen(request) pages=response.read().decode("utf-8") contents=json.loads(pages) return contentsdef main(): datalist = [] pageNum=2 #查找的页数 for num in range(1,pageNum+1): #左闭右开,打印1 ,2 ,3 urlOne="https://t.cjcyw.com/cjcy/goods/list?_index={}".format(num) #所需要查找的页数,循环 contents=messageAll(urlOne) content=contents['data']['records'] for i in content: data=[] starDate = i['time1'] loadTime = time.strftime("%Y-%m-%d", time.localtime(starDate)) data.append(loadTime) # 装货日期 starLoacation=i['dangqian'] data.append(starLoacation) #始发港 endLocation=i['mudidi'] data.append(endLocation) #目的港 cargo = i['title'] data.append(cargo) # 货物 aid=i['aid'] dunwei=i['dunwei'] data.append(dunwei) #货物重量 #查看每条船的详情信息 urlEnd="https://t.cjcyw.com/cjcy/goods/detail?aid={}".format(aid) contentEnd=messageAll(urlEnd) contentLast=contentEnd['data'] name=contentLast['publishLinkMan'] data.append(name) #名字 phone=contentLast['publishTel'] data.append(phone) #联系电话 beizhu = contentLast['beizhu11'] data.append(beizhu) #备注 data.append("长江船运网-货源") #来源,加到最后 datalist.append(data) print("第", num, "页,第", len(datalist), "条", data) timePass = time.localtime() # 输出系统的当前时间,当前时间为格林尼治时间? 秒数, timeNow = time.strftime("%Y-%m-%d", timePass) # 将系统的当前时间格式化输出,一天打印一次,若重复打印,最好将已输出的文件名字更改一下,避免数据丢失,重名会覆盖。 savePath = "%s长江船运网-----货源.xls" % (timeNow) # 保存的文件命名格式:当前时间(年-月-日)+文件名 table = xlwt.Workbook(encoding="utf-8", style_compression=0) sheet = table.add_sheet("货源信息-长江船运网", cell_overwrite_ok=True) col = ("装货日期", "起始地", "到达地", "货物类型", "货物重量", "名字", "联系电话", "备注","来源") for i in range(0, len(col)): sheet.write(0, i, col[i]) for i in range(len(datalist)): oneShip=datalist[i] for j in range(0,len(col)): a=oneShip[j] sheet.write(i+1,j,a) table.save(savePath) #文件保存到当前目录 nowDirectory=os.getcwd() #返回当前工作的目录 file_path=os.path.join(nowDirectory,savePath) #返回当前目录中指定的文件。 new_path=r'D:\长江船运网爬取文档\货源' #将文件保存到指定目录 shutil.move(file_path,new_path)if __name__=='__main__': main() print("打印完毕")