多线程;多进程
for循环改多线程;

  1. import multiprocessing.dummy as mp
  2. import time
  3. # 在多线程前要思考设计好循环架构
  4. def one_loop(i):
  5. localtime = time.time()
  6. print("at time {} print {}".format(localtime, i))
  7. if __name__ == '__main__':
  8. # mp.Pool(num_threads)
  9. # 默认是核心数
  10. p = mp.Pool()
  11. p.map(one_loop, range(10))
  12. # 结果
  13. # at time 1626055253.8186395 print 1
  14. # at time 1626055253.8086739 print 0
  15. # at time 1626055253.8186395 print 6
  16. # at time 1626055253.8186395 print 8
  17. # at time 1626055253.8186395 print 4
  18. # at time 1626055253.8186395 print 7
  19. # at time 1626055253.8186395 print 9
  20. # at time 1626055253.8186395 print 3
  21. # at time 1626055253.8186395 print 2
  22. # at time 1626055253.8186395 print 5

例子:多线程爬虫

这个代码不能直接运行,但是思路非常清晰。

  1. import requests
  2. import re
  3. import os
  4. import multiprocessing.dummy as mp
  5. def one_loop(idx):
  6. # myComicInfo = list<[comicURL, chapterName, imgUrls]>
  7. # imgUrls = list<url>
  8. # 这里也可以不用global而是传参进来,当时懒得查怎么传多个参数,就用global偷懒。
  9. global myComicInfo
  10. print(myComicInfo[idx])
  11. comicURL = myComicInfo[idx][0]
  12. comicHtml = get_html(comicURL)
  13. comicImgUrls = get_comic_image_urls(comicHtml)
  14. # 下载漫画
  15. comicName = myComicInfo[idx][1]
  16. for j in range(len(comicImgUrls)):
  17. imgName = "{}-{}.jpg".format(comicName, j + 1)
  18. if os.path.exists(imgName):
  19. print("{} exists".format(imgName))
  20. else:
  21. print("downloading {}".format(imgName))
  22. download_image(comicImgUrls[j], imgName)
  23. if __name__ == '__main__':
  24. # myComicInfo = list<[comicURL, chapterName, imgUrls]>
  25. # imgUrls = list<url>
  26. global myComicInfo
  27. # 省略100行,把myComicInfo处理好
  28. # 多线程下载
  29. p = mp.Pool()
  30. # 第一个变量,one_loop,是函数
  31. # 第二个变量,是传入one_loop函数的参数
  32. # 这里只给one_loop传入一个参数int,即range(xxx)
  33. p.map(one_loop, range(len(myComicInfo)))
  34. p.close()
  35. p.join()