多线程;多进程
for循环改多线程;
import multiprocessing.dummy as mp
import time
# 在多线程前要思考设计好循环架构
def one_loop(i):
localtime = time.time()
print("at time {} print {}".format(localtime, i))
if __name__ == '__main__':
# mp.Pool(num_threads)
# 默认是核心数
p = mp.Pool()
p.map(one_loop, range(10))
# 结果
# at time 1626055253.8186395 print 1
# at time 1626055253.8086739 print 0
# at time 1626055253.8186395 print 6
# at time 1626055253.8186395 print 8
# at time 1626055253.8186395 print 4
# at time 1626055253.8186395 print 7
# at time 1626055253.8186395 print 9
# at time 1626055253.8186395 print 3
# at time 1626055253.8186395 print 2
# at time 1626055253.8186395 print 5
例子:多线程爬虫
这个代码不能直接运行,但是思路非常清晰。
import requests
import re
import os
import multiprocessing.dummy as mp
def one_loop(idx):
# myComicInfo = list<[comicURL, chapterName, imgUrls]>
# imgUrls = list<url>
# 这里也可以不用global而是传参进来,当时懒得查怎么传多个参数,就用global偷懒。
global myComicInfo
print(myComicInfo[idx])
comicURL = myComicInfo[idx][0]
comicHtml = get_html(comicURL)
comicImgUrls = get_comic_image_urls(comicHtml)
# 下载漫画
comicName = myComicInfo[idx][1]
for j in range(len(comicImgUrls)):
imgName = "{}-{}.jpg".format(comicName, j + 1)
if os.path.exists(imgName):
print("{} exists".format(imgName))
else:
print("downloading {}".format(imgName))
download_image(comicImgUrls[j], imgName)
if __name__ == '__main__':
# myComicInfo = list<[comicURL, chapterName, imgUrls]>
# imgUrls = list<url>
global myComicInfo
# 省略100行,把myComicInfo处理好
# 多线程下载
p = mp.Pool()
# 第一个变量,one_loop,是函数
# 第二个变量,是传入one_loop函数的参数
# 这里只给one_loop传入一个参数int,即range(xxx)
p.map(one_loop, range(len(myComicInfo)))
p.close()
p.join()