获取京东的评论和搜索信息

    1. # endocing:utf-8
    2. # from bs4 import BeautifulSoup
    3. import re, requests, json
    4. # https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=100007386276&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1
    5. from lxml import etree
    6. keyword=input("请输入查询商品:")
    7. # print(type(keyword))
    8. g=[]
    9. for i in range(1,2):
    10. url="https://search.jd.com/Search?keyword={}&enc=utf-8&page={}".format(keyword,i*2-1)
    11. header={
    12. # ":authority":"search.jd.com",
    13. # ":method":"GET",
    14. # ":scheme":" https",
    15. "upgrade - insecure - requests": "1",
    16. "user-agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36"
    17. }
    18. html=requests.get(url=url,headers=header)
    19. html.encoding="utf-8"
    20. # print(html.text)
    21. html=html.text
    22. newhtml=etree.HTML(html)
    23. # print(newhtml)
    24. # log_id=re.findall("log_id:'(.*?)'",html,re.S)[0]
    25. # cid=re.findall("LogParm.*?cid:(.*?),",html,re.S)[0]
    26. # print(cid)
    27. sku_id=newhtml.xpath('//*[@id="J_goodsList"]/ul/li/@data-sku')
    28. pro_name=newhtml.xpath('//*[@id="J_goodsList"]/ul/li/@data-sku')
    29. # p_list=",".join('%s' % id for id in sku_id)
    30. print(sku_id)
    31. g = sku_id
    32. s = requests.session()
    33. url = 'https://club.jd.com/comment/productPageComments.action'
    34. data = {
    35. 'callback': 'fetchJSON_comment98',
    36. 'productId': '100007386276',
    37. 'score': 0,
    38. 'sortType': 5,
    39. 'pageSize': 10,
    40. 'isShadowSku': 0,
    41. 'page': 0
    42. }
    43. for i in g:
    44. data['productId'] = i
    45. t = s.get(url, params=data).text
    46. try:
    47. t = re.search(r'(?<=fetchJSON_comment98\().*(?=\);)', t).group(0)
    48. except Exception as e:
    49. print("出错了")
    50. continue
    51. j = json.loads(t)
    52. commentSummary = j['hotCommentTagStatistics']
    53. for comment in commentSummary:
    54. c_name = comment['name']
    55. c_count = comment['count']
    56. # c_client = comment['userClientShow']
    57. print('{}[{}]'.format(c_name, c_count,), end=',')
    58. print()

    e48e614593004a0bf315a7e953e055b.png

    https://www.zhihu.com/question/28981353?sort=created
    https://blog.csdn.net/weixin_42357472/article/details/83793386