抓取ajax请求方式的数据
"""爬取豆瓣电影信息分析url:https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=0https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=20https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start=40"""from requests_html import HTMLSession,UserAgentimport jsonclass GetDbMovies: def __init__(self): self.session = HTMLSession() self.headers = { 'User-Agent':UserAgent().random, 'Cookie': 'bid=tE6HN3Ew1o4; douban-fav-remind=1; __utmc=30149280; ll="118172"; __utmc=223695111; __gads=ID=57783c64c61b95f0-222169cb8ece00a5:T=1635952049:RT=1635952049:S=ALNI_MY6y5oy9OjMOtd8IX0CuXxdSF4SXQ; _vwo_uuid_v2=D81A9B83EB7C344F9A07D617FD1AE4887|474257f8af7204c717a5353d945eb818; __yadk_uid=UhvBUj5FHIKYmDv4VhlKTrZDXf1zW5uS; __utmz=30149280.1641476156.11.3.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; push_doumail_num=0; push_noty_num=0; dbcl2="198087769:Fi8yZABpQu4"; ck=o8Sj; __utmv=30149280.19808; gr_user_id=d4f2576e-6cd8-4af1-b939-2c8645bbdd3c; __utmz=223695111.1641642199.10.3.utmcsr=douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1641719347%2C%22https%3A%2F%2Fwww.douban.com%2F%22%5D; _pk_ses.100001.4cf6=*; ap_v=0,6.0; __utma=30149280.453644371.1620303194.1641641722.1641719347.14; __utmb=30149280.0.10.1641719347; __utma=223695111.1449280834.1635952050.1641642199.1641719347.11; __utmb=223695111.0.10.1641719347; Hm_lvt_16a14f3002af32bf3a75dfe352478639=1641720466; Hm_lpvt_16a14f3002af32bf3a75dfe352478639=1641720466; _pk_id.100001.4cf6=74019193f4d4d88d.1635952050.11.1641721330.1641643747.' } # 获取视频 def getMovies(self): for i in range(0,5): page_start = i * 20 url = f'https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=%E7%94%B5%E5%BD%B1&start={page_start}&year_range=2020,2020' response = self.session.get(url,headers=self.headers) # print(response.status_code) if response.status_code == 200: response.html.render() result = json.loads(response.text)['data'] for mv in result: title = mv['title'] url = mv['url'] casts = mv['casts'] cover = mv['cover'] print(f'电影名称:【{title}】,视频地址:{url},演出名单:{casts},封面图片:{cover}')obj = GetDbMovies()obj.getMovies()