特殊属性/方法
# 方式一: 直接解析start_urls中的网址,适合简单的GET请求# ...start_urls = ['https://blog.scrapinghub.com/']def parse(self, resposne): # do something with response ...
# 方式二: 在start_requests函数中进行一些循环或传参# ...# 循环生成不同page的url,传给parse回调函数def start_requests(self): for page in range(1, 6): url = 'https://blog.scrapinghub.com/page/{}'.format(page) # callback默认为self.parse, 可自己指定其他的 yield scrapy.Request(url, callback=self.parse)def parse(self, response): # do something with response ...# ...# POST方法传参def start_requests(self): for page in range(1, 11): payload = { 'display': 'All', 'page': str(page), 'limit': '10' } url = 'http://hgmdtrial.biobase-international.com/hgmd/pro/browseGene.php' yield FormRequest(url, formdata=payload, callback=self.parse)
# 变量的传递