代码

    1. import requests
    2. import csv
    3. import time
    4. import threading
    5. from queue import Queue
    6. class Producer(threading.Thread):
    7. headers = {
    8. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.50'
    9. }
    10. def __init__(self,Page_queue, data_queue):
    11. super(Producer, self).__init__()
    12. self.Page_queue = Page_queue
    13. self.data_queue = data_queue
    14. def run(self):
    15. while True:
    16. if self.Page_queue.empty():
    17. break
    18. url = self.Page_queue.get()
    19. self.parse_data(url)
    20. def parse_data(self,url):
    21. response = requests.get(url, headers=self.headers)
    22. data = response.json()['Data']['Posts'] # data是个列表,依次遍历
    23. for post in data:
    24. post_dic = {}
    25. post_dic['Post'] = post['RecruitPostName']
    26. post_dic['Location'] = post['LocationName']
    27. post_dic['PostURL'] = post['PostURL']
    28. self.data_queue.put(post_dic)
    29. class Consumer(threading.Thread):
    30. header = ['Post', 'Location', 'PostURL']
    31. f = open('腾讯招聘职位.csv', 'w',encoding='utf-8',newline='')
    32. writer = csv.DictWriter(f, header)
    33. writer.writeheader()
    34. def __init__(self, data_queue):
    35. super(Consumer, self).__init__()
    36. self.data_queue = data_queue
    37. def run(self):
    38. while True:
    39. if self.data_queue.empty():
    40. break
    41. job = self.data_queue.get()
    42. self.writer.writerow(job)
    43. if __name__ == '__main__':
    44. Page_q = Queue()
    45. data_q = Queue()
    46. for i in range(1, 11):
    47. page_url = f'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1651906031606&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={i}&pageSize=10&language=zh-cn&area=cn'
    48. Page_q.put(page_url)
    49. producer_lis = []
    50. for i in range(3):
    51. t1 = Producer(Page_q, data_q)
    52. t1.start()
    53. producer_lis.append(t1)
    54. for j in producer_lis:
    55. j.join() # 保护线程,子线程结束后主线程在
    56. for c in range(3):
    57. t2 = Consumer(data_q)
    58. t2.start()