使用正则表达式爬取网页图片
import reimport requestspattern = re.compile(r'<img src=*"/.*?\.jpg"') # 使用正则表达式url = "http://www.tipdm.com/tipdm/index.html"response = requests.get(url)urlList = re.findall(pattern, response.text)print(urlList)# 遍历出图片,存放到本地中source = "http://www.tipdm.com" # url地址i = 1 # 定义初始值for ul in urlList: url = source + ul.lstrip('<img src="').rstrip('"') # 将URL地址和img下的src元素进行拼接操作 print(url) # 打印图片的url fileName = str(i) + ".jpg" # 拼接文件名称 response = requests.get(url) # with open(fileName, "wb") as f: # 写入到当前项目路径中 f.write(response.content) # 写入数据 i = i + 1 # 加1循环操作