import scrapyimport jsonfrom jdGoods.items import JdgoodsItemfrom urllib.parse import urlencodeclass JdgoodspiderSpider(scrapy.Spider):name = 'jdGoodSpider'def start_requests(self):start_url = 'https://search-x.jd.com/Search?callback=jQuery5349492&area=15&enc=utf-8&'for page in range(5):params2 = {'keyword': "外设",'adType': "7",'page': page,'ad_ids': '291:19','xtest': 'new_search','_': '1650809204018'}url2 = start_url + urlencode(params2)yield scrapy.Request(url=url2,callback=self.parse)def parse(self, response):data = response.text.lstrip('jQuery5349492(')data = data.rstrip(')')all_goods = json.loads(data)['291']for good in all_goods:item = JdgoodsItem()item['goodName'] = good['ad_title'][0:10]good_img = "https://img10.360buyimg.com/n7/" + good['image_url']item['imgPath'] = good_imgyield item
2、items.py文件
class JdgoodsItem(scrapy.Item):goodName = scrapy.Field()imgPath = scrapy.Field()
3、管道文件pipeline.py
class ImagesSavePipeline(ImagesPipeline):# 设置文件保存的名称 重写file_path()def file_path(self, request, response=None, info=None):file_name = request.meta['name'] + '.jpg'return file_name# 发送获取图片的网络请求 重写get_media_request()方法。def get_media_requests(self, item, info):# 发送网络请求并传递商品名称yield scrapy.Request(item['imgPath'],meta={'name':item['goodName']})
4、设置settings.py文件
ITEM_PIPELINES = {'jdGoods.pipelines.ImagesSavePipeline': 300, # 启动管道# 'jdGoods.pipelines.JdgoodsPipeline': None,}IMAGES_STORE = './images' # 设置保存路径,这个名字是固定的。
