1. import scrapy
    2. import json
    3. from jdGoods.items import JdgoodsItem
    4. from urllib.parse import urlencode
    5. class JdgoodspiderSpider(scrapy.Spider):
    6. name = 'jdGoodSpider'
    7. def start_requests(self):
    8. start_url = 'https://search-x.jd.com/Search?callback=jQuery5349492&area=15&enc=utf-8&'
    9. for page in range(5):
    10. params2 = {
    11. 'keyword': "外设",
    12. 'adType': "7",
    13. 'page': page,
    14. 'ad_ids': '291:19',
    15. 'xtest': 'new_search',
    16. '_': '1650809204018'
    17. }
    18. url2 = start_url + urlencode(params2)
    19. yield scrapy.Request(url=url2,callback=self.parse)
    20. def parse(self, response):
    21. data = response.text.lstrip('jQuery5349492(')
    22. data = data.rstrip(')')
    23. all_goods = json.loads(data)['291']
    24. for good in all_goods:
    25. item = JdgoodsItem()
    26. item['goodName'] = good['ad_title'][0:10]
    27. good_img = "https://img10.360buyimg.com/n7/" + good['image_url']
    28. item['imgPath'] = good_img
    29. yield item

    2、items.py文件

    1. class JdgoodsItem(scrapy.Item):
    2. goodName = scrapy.Field()
    3. imgPath = scrapy.Field()

    3、管道文件pipeline.py

    1. class ImagesSavePipeline(ImagesPipeline):
    2. # 设置文件保存的名称 重写file_path()
    3. def file_path(self, request, response=None, info=None):
    4. file_name = request.meta['name'] + '.jpg'
    5. return file_name
    6. # 发送获取图片的网络请求 重写get_media_request()方法。
    7. def get_media_requests(self, item, info):
    8. # 发送网络请求并传递商品名称
    9. yield scrapy.Request(item['imgPath'],meta={'name':item['goodName']})

    4、设置settings.py文件

    1. ITEM_PIPELINES = {
    2. 'jdGoods.pipelines.ImagesSavePipeline': 300, # 启动管道
    3. # 'jdGoods.pipelines.JdgoodsPipeline': None,
    4. }
    5. IMAGES_STORE = './images' # 设置保存路径,这个名字是固定的。