抓取Switch遇到的问题

param 是get请求的参数

data是post请求的参数

提取时遇到有列表的情况 【0】取出再往下标:

tiqu = respones[‘results’][0][‘hits’]
for i in tiqu:

前两个内容为空 不是字典 不能直接迭代循环 加上if判断

dictyuanjiage = i[‘price’]
if type(dictyuanjiage) == dict:
yuanjia = dictyuanjiage[‘regPrice’]

if判断会导致直接跳过数据为空的图片 抓取到的与官网的匹配不上

解决办法:加上else判断 不为dict也进行保存

else:
img = i [‘productImage’]
lianjie = i[‘url’]
wanzhengtupian = ‘https://assets.nintendo.com/image/upload/ar_16:9,b_auto:border,c_lpad/b_white/f_auto/q_auto/dpr_auto/c_scale,w_300/'+img
wanzhenglianjie = ‘https://www.nintendo.com/'+lianjie
pinjie = baocundizhi + ‘tu_%s’ % n + ‘.’ + ‘.jpg’

以下为完整代码


import csv
import requests
import time

url = ‘https://u3b6gr4ua3-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20JavaScript%20(4.13.0)%3B%20Browser%3B%20JS%20Helper%20(3.8.2)%3B%20react%20(17.0.2)%3B%20react-instantsearch%20(6.23.3)’

body = “{\”requests\”:[{\”indexName\”:\”store_game_en_us\”,\”params\”:\”highlightPreTag=%5E%5E%5E” \
“&highlightPostTag=%5E
“ \
“&filters=” \
“&hitsPerPage=40” \
“&analytics=true” \
“&clickAnalytics=true” \
“&attributesToHighlight=%5B%22description%22%5D” \
“&maxValuesPerFacet=20” \
“&page=” \
“&facets=%5B%22topLevelFilters%22%2C%22nsoFeatures%22%2C%22corePlatforms%22%2C%22availability%22%2C%22genres%22%2C%22editions%22%2C%22franchises%22%2C%22priceRange%22%2C%22esrbRating%22%2C%22playerCount%22%2C%22softwarePublisher%22%2C%22softwareDeveloper%22%5D” \
“&tagFilters=” \
“&facetFilters=%5B%22corePlatforms%3ANintendo%20Switch%22%5D\”}]}”

headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36’,
‘Referer’: ‘https://www.nintendo.com/‘,
‘x-algolia-api-key’: ‘a29c6927638bfd8cee23993e51e721c9’,
‘x-algolia-application-id’: ‘U3B6GR4UA3’
}

baocundizhi = ‘./Switch/‘
n = 1

with open(‘任天堂游戏信息.csv’, ‘a’, encoding=’utf-8’, newline=’’) as f:
csv_writer = csv.writer(f)
csv_writer.writerow([‘名称’, ‘原价’, ‘折扣价’, ‘购买链接’, ‘图片链接’])

for a in range(0,26,1):
body1 = body.replace(“&page=”,”&page=”+str(a))
# print(body1)
respones = requests.post(url=url,headers=headers,data=body1).json()
tiqu = respones[‘results’][0][‘hits’]
for i in tiqu:
title = i[‘title’].replace(“&”,’-‘).replace(‘:’, ‘-‘).replace(‘《’,’-‘).replace(‘》’,’-‘).replace(‘|’,’-‘).replace(‘[‘,’-‘).replace(‘]’,’-‘).replace(‘/‘,’-‘).replace(‘’’,’-‘).replace(‘(‘,’-‘).replace(‘)’,’-‘).replace(“‘“,’-‘).replace(‘“‘,’-‘)
dictyuanjiage = i[‘price’]
if type(dictyuanjiage) == dict:
yuanjia = dictyuanjiage[‘regPrice’]
zhekou = dictyuanjiage[‘salePrice’]
img = i [‘productImage’]
lianjie = i[‘url’]
wanzhengtupian = ‘https://assets.nintendo.com/image/upload/ar_16:9,b_auto:border,c_lpad/b_white/f_auto/q_auto/dpr_auto/c_scale,w_300/'+img
wanzhenglianjie = ‘https://www.nintendo.com/'+lianjie
pinjie = baocundizhi + ‘tu%s’ % n + ‘.’ + ‘.jpg’
with open(‘任天堂游戏信息.csv’, ‘a’, encoding=’utf-8’, newline=’’) as f:
csv_writer = csv.writer(f)
csv_writer.writerow([title, yuanjia, zhekou, wanzhenglianjie, wanzhengtupian ])
print(title+’csv保存成功’)
try:
f = open(pinjie, mode=’wb’)
f.write(requests.get(url=wanzhengtupian, headers=headers).content)
print(title+’图片保存成功!!!’)
except:
print(“图片未加载”)
n += 1
time.sleep(1)
else:
img = i [‘productImage’]
lianjie = i[‘url’]
wanzhengtupian = ‘https://assets.nintendo.com/image/upload/ar_16:9,b_auto:border,c_lpad/b_white/f_auto/q_auto/dpr_auto/c_scale,w_300/'+img
wanzhenglianjie = ‘https://www.nintendo.com/'+lianjie
pinjie = baocundizhi + ‘tu
%s’ % n + ‘.’ + ‘.jpg’
with open(‘任天堂游戏信息.csv’, ‘a’, encoding=’utf-8’, newline=’’) as f:
csv_writer = csv.writer(f)
csv_writer.writerow([title, ‘’, ‘’, wanzhenglianjie, wanzhengtupian ])
print(‘nodict—‘+title+’csv保存成功’)
try:
f = open(pinjie, mode=’wb’)
f.write(requests.get(url=wanzhengtupian, headers=headers).content)
print(‘nodict—‘+title+’图片保存成功!!!’)
except:
print(“图片未加载”)
n += 1
time.sleep(0.5)