1. from pdb import runcall
    2. from bs4 import BeautifulSoup
    3. from urllib.request import urlopen
    4. import re
    5. import requests
    6. from asyncore import write
    7. from random import randint
    8. from time import sleep
    9. from pprint import pprint
    10. def getPetDict(url): # 函数getPetDict,将宠物名与图片链接写入字典
    11. petNameLinkDict = {} # 创建一个空字典
    12. html = urlopen(url)
    13. bs = BeautifulSoup(html, 'lxml') # 解析HTML
    14. for link in bs.find_all('img'):
    15. petLink = link.get('src') # 获取图片链接
    16. petName = link.get('alt') # 获取宠物名
    17. petLinkRegex = re.compile(
    18. r'http://i1\.bagong\.cn/.*?jpg') # 创建链接的正则匹配模式
    19. petNameRegex = re.compile(
    20. r'[一-龥]+') # 创建宠物名的正则匹配模式
    21. if petLinkRegex.search(petLink) != None: # 将符合匹配的链接存入字典
    22. petName = petNameRegex.search(petName).group()
    23. # 键:宠物名;值:图片链接
    24. petNameLinkDict[petName] = petLink
    25. return petNameLinkDict
    26. def saveImage(petDict): # 通过字典中的键值对保存并命名图片
    27. imgIndex = 1
    28. for keys in petDict.keys():
    29. imgName = keys
    30. imgURL = petDict[keys]
    31. path = 'C://Users//Chen//Pictures//pythonTest//' + str(imgIndex) + ' ' +\
    32. str(imgName) + '.jpg' # 图片保存地址,请自行修改
    33. response = requests.get(imgURL)
    34. with open(path, "wb") as file:
    35. file.write(response.content)
    36. sleep(randint(1, 3))
    37. imgIndex += 1
    38. file.close()
    39. # dogDict = getPetDict('https://www.bagong.cn/dog/')
    40. # saveImage(dogDict)
    41. catDict = getPetDict('https://www.bagong.cn/cat/')
    42. # saveImage(catDict)
    43. pprint(catDict)