获取京东的评论和搜索信息
# endocing:utf-8# from bs4 import BeautifulSoupimport re, requests, json# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100007386276&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1from lxml import etreekeyword=input("请输入查询商品:")# print(type(keyword))g=[]for i in range(1,2):url="https://search.jd.com/Search?keyword={}&enc=utf-8&page={}".format(keyword,i*2-1)header={# ":authority":"search.jd.com",# ":method":"GET",# ":scheme":" https","upgrade - insecure - requests": "1","user-agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36"}html=requests.get(url=url,headers=header)html.encoding="utf-8"# print(html.text)html=html.textnewhtml=etree.HTML(html)# print(newhtml)# log_id=re.findall("log_id:'(.*?)'",html,re.S)[0]# cid=re.findall("LogParm.*?cid:(.*?),",html,re.S)[0]# print(cid)sku_id=newhtml.xpath('//*[@id="J_goodsList"]/ul/li/@data-sku')pro_name=newhtml.xpath('//*[@id="J_goodsList"]/ul/li/@data-sku')# p_list=",".join('%s' % id for id in sku_id)print(sku_id)g = sku_ids = requests.session()url = 'https://club.jd.com/comment/productPageComments.action'data = {'callback': 'fetchJSON_comment98','productId': '100007386276','score': 0,'sortType': 5,'pageSize': 10,'isShadowSku': 0,'page': 0}for i in g:data['productId'] = it = s.get(url, params=data).texttry:t = re.search(r'(?<=fetchJSON_comment98\().*(?=\);)', t).group(0)except Exception as e:print("出错了")continuej = json.loads(t)commentSummary = j['hotCommentTagStatistics']for comment in commentSummary:c_name = comment['name']c_count = comment['count']# c_client = comment['userClientShow']print('{}[{}]'.format(c_name, c_count,), end=',')print()

https://www.zhihu.com/question/28981353?sort=created
https://blog.csdn.net/weixin_42357472/article/details/83793386
