python爬虫—京东 - 《编程之路》

获取京东的评论和搜索信息
# endocing:utf-8
# from bs4 import BeautifulSoup
import re, requests, json
# https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100007386276&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
from  lxml import etree
keyword=input("请输入查询商品：")
# print(type(keyword))
g=[]
for i in range(1,2):
    url="https://search.jd.com/Search?keyword={}&enc=utf-8&page={}".format(keyword,i*2-1)
    header={
        # ":authority":"search.jd.com",
        # ":method":"GET",
        # ":scheme":" https",
        "upgrade - insecure - requests": "1",
        "user-agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36"
    }
    html=requests.get(url=url,headers=header)
    html.encoding="utf-8"
    # print(html.text)
    html=html.text
    newhtml=etree.HTML(html)
    # print(newhtml)
    # log_id=re.findall("log_id:'(.*?)'",html,re.S)[0]
    # cid=re.findall("LogParm.*?cid:(.*?),",html,re.S)[0]
    # print(cid)
    sku_id=newhtml.xpath('//*[@id="J_goodsList"]/ul/li/@data-sku')
    pro_name=newhtml.xpath('//*[@id="J_goodsList"]/ul/li/@data-sku')
# p_list=",".join('%s' % id for id in sku_id)
    print(sku_id)
    g = sku_id
s = requests.session()
url = 'https://club.jd.com/comment/productPageComments.action'
data = {
    'callback': 'fetchJSON_comment98',
    'productId': '100007386276',
    'score': 0,
    'sortType': 5,
    'pageSize': 10,
    'isShadowSku': 0,
    'page': 0
}
for i in g:
    data['productId'] = i
    t = s.get(url, params=data).text
    try:
        t = re.search(r'(?<=fetchJSON_comment98\().*(?=\);)', t).group(0)
    except Exception as e:
        print("出错了")
        continue
    j = json.loads(t)
    commentSummary = j['hotCommentTagStatistics']
    for comment in commentSummary:
        c_name = comment['name']
        c_count = comment['count']
        # c_client = comment['userClientShow']
        print('{}[{}]'.format(c_name, c_count,), end=',')
    print()
https://www.zhihu.com/question/28981353?sort=created
https://blog.csdn.net/weixin_42357472/article/details/83793386