信息校验型 - 《爬虫》

获取大众点评店铺示例
方法1
方法2
大众点评代码示例

获取大众点评店铺示例

方法1

chrome浏览器中查看获取所需的校验请求信息

方法2

使用简单的抓包分析，获取所需的校验信息等, 截图使用的是Fiddler4抓包工具
备注：此处的抓包明显没有多大意义，因为直接是请求的某个链接，不是所谓的要传递参数的接口链接。

备注: 复制获取到的请求校验信息到代码中，方式很多。偷懒神器： https://zhuanlan.zhihu.com/p/56447124

大众点评代码示例

import requests
import urllib3
from lxml import etree
cookies = {
    's_ViewType': '10',
    '_lxsdk_cuid': '179ad1b34f7c8-0ef2077a853153-f7f1939-1fa400-179ad1b34f7c8',
    '_lxsdk': '179ad1b34f7c8-0ef2077a853153-f7f1939-1fa400-179ad1b34f7c8',
    '_hc.v': '0b329e3f-dc18-ed51-8bf1-a8cd52e89967.1622106912',
    'ua': 'Vibes',
    'ctu': '5056724c60220d56836e417da179bfba44c1ac0c207dd2a3f5af25fbc965880f',
    'td_cookie': '2312361676',
    'cy': '14',
    'cye': 'fuzhou',
    'dplet': '1e1a7493bd0ecbd1f7ec5b8c6cf2a4a8',
    'dper': '146c52535101e1064cf611627e77de87a136ee2583902a603ec8c7ef790052b0820346eff54949511eb833ac4ebaebdd71a076e91c93439aaad7e41b555be134b5195e113c3fd73a67f790a4694f57a95addd1512b46db91711fe64754deec1a',
    'fspop': 'test',
    'll': '7fd06e815b796be3df069dec7836c3df',
    'Hm_lvt_602b80cf8079ae6591966cc70a3940e7': '1622769563,1623035602,1623130539,1623651956',
    'Hm_lpvt_602b80cf8079ae6591966cc70a3940e7': '1623652129',
    '_lxsdk_s': '17a0932b3d9-2fb-6c7-63f^%^7C^%^7C95',
}
headers = {
    'Proxy-Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en',
}
if __name__ == '__main__':
    # 忽略https安全证书（verify=False）的验证之后， urllib3仍然有安全警告，强制警用即可。
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    url = 'http://www.dianping.com/fuzhou/ch10/p2'
    response = requests.get(url, headers=headers, cookies=cookies, verify=False)
    # lxml模块格式化HTML数据
    element = etree.HTML(response.text) 
    # xpath提取所需数据
    res_list = element.xpath("//div[@class='shop-list J_shop-list shop-all-list']/ul/li/div[2]//div[@class='tit']/a/@title") 
    print(len(res_list))  # 15
    print(res_list[0])  # 三生石·福建菜(五四路国际大厦店)