# -*- coding = ut-8 -*-
# @Time : 2021/6/2 9:23
# @Author : PHC
# @File cjcyw_goods.py
# @Software : PyCharm
import json
import requests
import urllib
import xlwt
import time
import os
import shutil
#每访问的地址都不一样,将地址传参的形式代入
def messageAll(url):
head={
"Cookie":"cjcy_session_id=a7ff8f24-fcb2-43d3-80e4-53a7d466cea9",
"User-Agent":"cicy/5.5 (iPhone; iOS 12.0; Scale/3.00)",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "t.cjcyw.com"
}
request=urllib.request.Request(url,headers=head)
response=urllib.request.urlopen(request)
pages=response.read().decode("utf-8")
contents=json.loads(pages)
return contents
def main():
datalist = []
pageNum=2 #查找的页数
for num in range(1,pageNum+1): #左闭右开,打印1 ,2 ,3
urlOne="https://t.cjcyw.com/cjcy/goods/list?_index={}".format(num) #所需要查找的页数,循环
contents=messageAll(urlOne)
content=contents['data']['records']
for i in content:
data=[]
starDate = i['time1']
loadTime = time.strftime("%Y-%m-%d", time.localtime(starDate))
data.append(loadTime) # 装货日期
starLoacation=i['dangqian']
data.append(starLoacation) #始发港
endLocation=i['mudidi']
data.append(endLocation) #目的港
cargo = i['title']
data.append(cargo) # 货物
aid=i['aid']
dunwei=i['dunwei']
data.append(dunwei) #货物重量
#查看每条船的详情信息
urlEnd="https://t.cjcyw.com/cjcy/goods/detail?aid={}".format(aid)
contentEnd=messageAll(urlEnd)
contentLast=contentEnd['data']
name=contentLast['publishLinkMan']
data.append(name) #名字
phone=contentLast['publishTel']
data.append(phone) #联系电话
beizhu = contentLast['beizhu11']
data.append(beizhu) #备注
data.append("长江船运网-货源") #来源,加到最后
datalist.append(data)
print("第", num, "页,第", len(datalist), "条", data)
timePass = time.localtime() # 输出系统的当前时间,当前时间为格林尼治时间? 秒数,
timeNow = time.strftime("%Y-%m-%d", timePass) # 将系统的当前时间格式化输出,一天打印一次,若重复打印,最好将已输出的文件名字更改一下,避免数据丢失,重名会覆盖。
savePath = "%s长江船运网-----货源.xls" % (timeNow) # 保存的文件命名格式:当前时间(年-月-日)+文件名
table = xlwt.Workbook(encoding="utf-8", style_compression=0)
sheet = table.add_sheet("货源信息-长江船运网", cell_overwrite_ok=True)
col = ("装货日期", "起始地", "到达地", "货物类型", "货物重量", "名字", "联系电话", "备注","来源")
for i in range(0, len(col)):
sheet.write(0, i, col[i])
for i in range(len(datalist)):
oneShip=datalist[i]
for j in range(0,len(col)):
a=oneShip[j]
sheet.write(i+1,j,a)
table.save(savePath) #文件保存到当前目录
nowDirectory=os.getcwd() #返回当前工作的目录
file_path=os.path.join(nowDirectory,savePath) #返回当前目录中指定的文件。
new_path=r'D:\长江船运网爬取文档\货源' #将文件保存到指定目录
shutil.move(file_path,new_path)
if __name__=='__main__':
main()
print("打印完毕")