获取京东的评论和搜索信息
# endocing:utf-8
# from bs4 import BeautifulSoup
import re, requests, json
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100007386276&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
from lxml import etree
keyword=input("请输入查询商品:")
# print(type(keyword))
g=[]
for i in range(1,2):
url="https://search.jd.com/Search?keyword={}&enc=utf-8&page={}".format(keyword,i*2-1)
header={
# ":authority":"search.jd.com",
# ":method":"GET",
# ":scheme":" https",
"upgrade - insecure - requests": "1",
"user-agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36"
}
html=requests.get(url=url,headers=header)
html.encoding="utf-8"
# print(html.text)
html=html.text
newhtml=etree.HTML(html)
# print(newhtml)
# log_id=re.findall("log_id:'(.*?)'",html,re.S)[0]
# cid=re.findall("LogParm.*?cid:(.*?),",html,re.S)[0]
# print(cid)
sku_id=newhtml.xpath('//*[@id="J_goodsList"]/ul/li/@data-sku')
pro_name=newhtml.xpath('//*[@id="J_goodsList"]/ul/li/@data-sku')
# p_list=",".join('%s' % id for id in sku_id)
print(sku_id)
g = sku_id
s = requests.session()
url = 'https://club.jd.com/comment/productPageComments.action'
data = {
'callback': 'fetchJSON_comment98',
'productId': '100007386276',
'score': 0,
'sortType': 5,
'pageSize': 10,
'isShadowSku': 0,
'page': 0
}
for i in g:
data['productId'] = i
t = s.get(url, params=data).text
try:
t = re.search(r'(?<=fetchJSON_comment98\().*(?=\);)', t).group(0)
except Exception as e:
print("出错了")
continue
j = json.loads(t)
commentSummary = j['hotCommentTagStatistics']
for comment in commentSummary:
c_name = comment['name']
c_count = comment['count']
# c_client = comment['userClientShow']
print('{}[{}]'.format(c_name, c_count,), end=',')
print()
https://www.zhihu.com/question/28981353?sort=created
https://blog.csdn.net/weixin_42357472/article/details/83793386