1. import requests
    2. from bs4 import BeautifulSoup
    3. import csv
    4. def read_page(url, headers):
    5. res = requests.get(url, headers=headers)
    6. res.encoding = 'gb2312'
    7. html = res.text
    8. soup = BeautifulSoup(html, 'lxml')
    9. zhuti = soup.find('div', class_='zhuti')
    10. table = zhuti.find('table')
    11. trs = table.find_all('tr')[1:]
    12. lst = []
    13. for tr in trs:
    14. info = {}
    15. tds = tr.find_all('td')
    16. if tds:
    17. name_td = tds[1]
    18. movie_name = list(name_td.strings)[1]
    19. piaofang_td = tds[-1]
    20. piaofang = list(piaofang_td.strings)[0]
    21. info['name'] = movie_name
    22. info['piaofang'] = piaofang
    23. lst.append(info)
    24. return lst
    25. def write(lst):
    26. with open('movies.csv', 'w', encoding='utf-8-sig', newline='') as f:
    27. wt = csv.DictWriter(f, fieldnames=['name', 'piaofang'])
    28. wt.writeheader()
    29. wt.writerows(lst)
    30. url = 'http://www.piaofang.biz/'
    31. headers = {
    32. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'
    33. }
    34. lst = read_page(url, headers)
    35. write(lst)