使用正则表达式爬取网页图片

使用正则表达式爬取网页图片

import re
import requests
pattern = re.compile(r'<img src=*"/.*?\.jpg"')  # 使用正则表达式
url = "http://www.tipdm.com/tipdm/index.html"
response = requests.get(url)
urlList = re.findall(pattern, response.text)
print(urlList)
# 遍历出图片，存放到本地中
source = "http://www.tipdm.com" # url地址
i = 1 # 定义初始值
for ul in urlList:
    url = source + ul.lstrip('<img src="').rstrip('"') # 将URL地址和img下的src元素进行拼接操作
    print(url) # 打印图片的url
    fileName = str(i) + ".jpg" # 拼接文件名称
    response = requests.get(url) #
    with open(fileName, "wb") as f: # 写入到当前项目路径中
        f.write(response.content) # 写入数据
    i = i + 1 # 加1循环操作

学习笔记

Py爬虫

使用正则表达式爬取网页图片