import requests
from bs4 import BeautifulSoup
import csv
def read_page(url, headers):
res = requests.get(url, headers=headers)
res.encoding = 'gb2312'
html = res.text
soup = BeautifulSoup(html, 'lxml')
zhuti = soup.find('div', class_='zhuti')
table = zhuti.find('table')
trs = table.find_all('tr')[1:]
lst = []
for tr in trs:
info = {}
tds = tr.find_all('td')
if tds:
name_td = tds[1]
movie_name = list(name_td.strings)[1]
piaofang_td = tds[-1]
piaofang = list(piaofang_td.strings)[0]
info['name'] = movie_name
info['piaofang'] = piaofang
lst.append(info)
return lst
def write(lst):
with open('movies.csv', 'w', encoding='utf-8-sig', newline='') as f:
wt = csv.DictWriter(f, fieldnames=['name', 'piaofang'])
wt.writeheader()
wt.writerows(lst)
url = 'http://www.piaofang.biz/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'
}
lst = read_page(url, headers)
write(lst)