import requests, timefrom pyquery import PyQuery as pq# 这里的抓取最不同的就是,翻页参数since_id的获取,这个参数的下一页是在上一页的since_id中,所以要从响应中获取并更新since_id,其余就没有什么难度了# 获取响应数据def get_page(url): print('since_id----', since_id) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36', 'Referer': 'https://m.weibo.cn/u/2830678474', 'X-Requested-With': 'XMLHttpRequest' } params = { 'type': 'uid', 'value': '2830678474', 'containerid': '1076032830678474', 'since_id': since_id } try: response = requests.get(url, headers=headers, params=params) if response.status_code == 200: return response.json() except requests.ConnectionError as e: print('Error', e)# 解析响应数据,包括更新since_id参数def parse_page(json_data): global since_id items = json_data.get('data').get('cards') for item in items: weibo = {} weibo['id'] = item.get('mblog').get('id') weibo['created_at'] = item.get('mblog').get('created_at') weibo['text'] = pq(item.get('mblog').get('text')).text() weibo['bid'] = item.get('mblog').get('bid') since_id = json_data.get('data').get('cardlistInfo').get('since_id') yield weibo# 执行主程序def run(page): url = 'https://m.weibo.cn/api/container/getIndex' json_data = get_page(url) results = parse_page(json_data) for result in results: print(result) print(f'第{page}页完成')if __name__ == '__main__': since_id = '' for page in range(3): run(page+1)