‘’’
目标url: https://www.fabiaoqing.com/biaoqing/lists/page/1.html
需求:爬表情包前10页,保存图片 (每页45个表情包)
分析:
第一页:https://www.fabiaoqing.com/biaoqing/lists/page/1.html
第二页:https://www.fabiaoqing.com/biaoqing/lists/page/2.html
第三页:https://www.fabiaoqing.com/biaoqing/lists/page/3.html
…
‘’’
imgs_url = []
for i in range(1, 11):
url = f'https://www.fabiaoqing.com/biaoqing/lists/page/{i}.html'
html = requests.get(url)
html_list = etree.HTML(html.text)
links = html_list.xpath('.//div[@class="tagbqppdiv"]/a/img')
for link in links:
img_url = link.xpath('.//@data-original')[0]
imgs_url.append(img_url)
path = os.getcwd() + '\\imgs1\\' # 先设置好要保存图片的目录
for img_url in imgs_url:
img_name = os.path.join(path, f'{img_url[31:]}') # 把目录和图片名合并为一个长的字符串
r = requests.get(img_url, stream=True)
# if r.status_code == 200: # 昨晚有这一行代码,只爬了405个,今天去掉后就能下到450个
open(img_name, 'wb').write(r.content)
del r
print('保存完成!')
总结一下:
if r.status_code == 200: # 昨晚有这一行代码,只爬了405个,今天去掉后就能下到450个,为什么?