BeautifulSoup RegEx 尝试 - 《梓宁的Manjaro Linux / Python学习笔记📔》

from pdb import runcall
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
import requests
from asyncore import write
from random import randint
from time import sleep
from pprint import pprint
def getPetDict(url):  # 函数getPetDict，将宠物名与图片链接写入字典
    petNameLinkDict = {}  # 创建一个空字典
    html = urlopen(url)
    bs = BeautifulSoup(html, 'lxml')  # 解析HTML
    for link in bs.find_all('img'):
        petLink = link.get('src')  # 获取图片链接
        petName = link.get('alt')  # 获取宠物名
        petLinkRegex = re.compile(
            r'http://i1\.bagong\.cn/.*?jpg')  # 创建链接的正则匹配模式
        petNameRegex = re.compile(
            r'[一-龥]+')  # 创建宠物名的正则匹配模式
        if petLinkRegex.search(petLink) != None:  # 将符合匹配的链接存入字典
            petName = petNameRegex.search(petName).group()
            # 键：宠物名；值：图片链接
            petNameLinkDict[petName] = petLink
    return petNameLinkDict
def saveImage(petDict):  # 通过字典中的键值对保存并命名图片
    imgIndex = 1
    for keys in petDict.keys():
        imgName = keys
        imgURL = petDict[keys]
        path = 'C://Users//Chen//Pictures//pythonTest//' + str(imgIndex) + ' ' +\
            str(imgName) + '.jpg'  # 图片保存地址，请自行修改
        response = requests.get(imgURL)
        with open(path, "wb") as file:
            file.write(response.content)
        sleep(randint(1, 3))
        imgIndex += 1
        file.close()
# dogDict = getPetDict('https://www.bagong.cn/dog/')
# saveImage(dogDict)
catDict = getPetDict('https://www.bagong.cn/cat/')
# saveImage(catDict)
pprint(catDict)