import scrapy
import json
from jdGoods.items import JdgoodsItem
from urllib.parse import urlencode
class JdgoodspiderSpider(scrapy.Spider):
name = 'jdGoodSpider'
def start_requests(self):
start_url = 'https://search-x.jd.com/Search?callback=jQuery5349492&area=15&enc=utf-8&'
for page in range(5):
params2 = {
'keyword': "外设",
'adType': "7",
'page': page,
'ad_ids': '291:19',
'xtest': 'new_search',
'_': '1650809204018'
}
url2 = start_url + urlencode(params2)
yield scrapy.Request(url=url2,callback=self.parse)
def parse(self, response):
data = response.text.lstrip('jQuery5349492(')
data = data.rstrip(')')
all_goods = json.loads(data)['291']
for good in all_goods:
item = JdgoodsItem()
item['goodName'] = good['ad_title'][0:10]
good_img = "https://img10.360buyimg.com/n7/" + good['image_url']
item['imgPath'] = good_img
yield item
2、items.py文件
class JdgoodsItem(scrapy.Item):
goodName = scrapy.Field()
imgPath = scrapy.Field()
3、管道文件pipeline.py
class ImagesSavePipeline(ImagesPipeline):
# 设置文件保存的名称 重写file_path()
def file_path(self, request, response=None, info=None):
file_name = request.meta['name'] + '.jpg'
return file_name
# 发送获取图片的网络请求 重写get_media_request()方法。
def get_media_requests(self, item, info):
# 发送网络请求并传递商品名称
yield scrapy.Request(item['imgPath'],meta={'name':item['goodName']})
4、设置settings.py文件
ITEM_PIPELINES = {
'jdGoods.pipelines.ImagesSavePipeline': 300, # 启动管道
# 'jdGoods.pipelines.JdgoodsPipeline': None,
}
IMAGES_STORE = './images' # 设置保存路径,这个名字是固定的。