1. import requests
    2. import threading
    3. from queue import Queue
    4. import csv
    5. class Producer(threading.Thread):
    6. headers = {
    7. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36'
    8. }
    9. def __init__(self,url_queue,data_queue):
    10. super(Producer, self).__init__()
    11. self.url_queue = url_queue
    12. self.data_queue = data_queue
    13. def run(self):
    14. while True:
    15. if url_queue.empty():
    16. break
    17. url = self.url_queue.get()
    18. self.parse_page(url)
    19. def parse_page(self, url):
    20. res = requests.get(url, headers=self.headers)
    21. result = res.json()
    22. dat = result['Data']['Posts']
    23. data = {}
    24. for item in dat:
    25. data['职位'] = item['RecruitPostName']
    26. data['地址'] = item['LocationName']
    27. data['网址'] = item['PostURL']
    28. self.data_queue.put(data)
    29. class Consumer(threading.Thread):
    30. f = open('腾讯招聘.csv', 'a', encoding='utf-8-sig', newline='')
    31. writer = csv.DictWriter(f, fieldnames=['职位', '地址', '网址'])
    32. writer.writeheader()
    33. def __init__(self,data_queue):
    34. super(Consumer, self).__init__()
    35. self.data_queue = data_queue
    36. def run(self):
    37. while True:
    38. if data_queue.empty():
    39. break
    40. all_data = self.data_queue.get()
    41. self.writer.writerow(all_data)
    42. if __name__ == '__main__':
    43. url_queue = Queue()
    44. data_queue = Queue()
    45. for i in range(1,11):
    46. url = f'https://careers.tencent.com/tencentcareer/api/post/Query?categoryId=&parentCategoryId=40001&&pageIndex={i}&pageSize=10&language=zh-cn&area=cn'
    47. url_queue.put(url)
    48. p_list = []
    49. for i in range(3):
    50. t = Producer(url_queue, data_queue)
    51. t.start()
    52. p_list.append(t)
    53. for p in p_list:
    54. p.join()
    55. c_list = []
    56. for i in range(3):
    57. t = Consumer(data_queue)
    58. t.start()
    59. c_list.append(t)
    60. for c in c_list:
    61. c.join()