代码
import requestsimport csvimport timeimport threadingfrom queue import Queueclass Producer(threading.Thread):headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.50'}def __init__(self,Page_queue, data_queue):super(Producer, self).__init__()self.Page_queue = Page_queueself.data_queue = data_queuedef run(self):while True:if self.Page_queue.empty():breakurl = self.Page_queue.get()self.parse_data(url)def parse_data(self,url):response = requests.get(url, headers=self.headers)data = response.json()['Data']['Posts'] # data是个列表,依次遍历for post in data:post_dic = {}post_dic['Post'] = post['RecruitPostName']post_dic['Location'] = post['LocationName']post_dic['PostURL'] = post['PostURL']self.data_queue.put(post_dic)class Consumer(threading.Thread):header = ['Post', 'Location', 'PostURL']f = open('腾讯招聘职位.csv', 'w',encoding='utf-8',newline='')writer = csv.DictWriter(f, header)writer.writeheader()def __init__(self, data_queue):super(Consumer, self).__init__()self.data_queue = data_queuedef run(self):while True:if self.data_queue.empty():breakjob = self.data_queue.get()self.writer.writerow(job)if __name__ == '__main__':Page_q = Queue()data_q = Queue()for i in range(1, 11):page_url = f'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1651906031606&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={i}&pageSize=10&language=zh-cn&area=cn'Page_q.put(page_url)producer_lis = []for i in range(3):t1 = Producer(Page_q, data_q)t1.start()producer_lis.append(t1)for j in producer_lis:j.join() # 保护线程,子线程结束后主线程在for c in range(3):t2 = Consumer(data_q)t2.start()
