模板一
import multiprocessingimport osclass ProcessClass(multiprocessing.Process): def __init__(self, queue): multiprocessing.Process.__init__(self) self.queue = queue def run(self): while True: if self.queue.empty(): break else: item = self.queue.get() self.parse(item) def parse(self, item): print('[{}]号进程:{}'.format(os.getpid(), item)) # time.sleep(1)def main(): # 队列必须使用多进程的队列,使用queue模块会报错 queue = multiprocessing.Queue() for i in range(20): queue.put(i) process_list = [] process_num = 4 for i in range(process_num): p = ProcessClass(queue) p.start() process_list.append(p) for p in process_list: p.join()if __name__ == '__main__': main()
模板二
import multiprocessingimport osfrom queue import Queuefrom threading import Thread# 多进程+多线程 适合两层网站的解析class ProcessClass(multiprocessing.Process): def __init__(self, queue): multiprocessing.Process.__init__(self) self.queue = queue def run(self): while True: if self.queue.empty(): break else: item = self.queue.get() self.parse(item) def parse(self, item): print('[{}]号进程:{}'.format(os.getpid(), item)) # 解析出页面链接 url_list = ["123", "123", "123"] queue = Queue() for x in range(20): worker = Crawl(queue) worker.daemon = True worker.start() for url in url_list: queue.put(url) queue.join()class Crawl(Thread): def __init__(self, queue): Thread.__init__(self) self.queue = queue def run(self): while True: q_url = self.queue.get() try: self.parse(q_url) finally: self.queue.task_done() def parse(self, q_url): print("开始解析链接:", q_url)def main(): # 队列必须使用多进程的队列,使用queue模块会报错 queue = multiprocessing.Queue() base_url = 'https://www.baidu.com/{}.html' for i in range(1, 20): queue.put(base_url.format(i)) process_list = [] process_num = 4 for i in range(process_num): p = ProcessClass(queue) p.start() process_list.append(p) for p in process_list: p.join()if __name__ == '__main__': main()