特殊属性/方法
# 方式一: 直接解析start_urls中的网址,适合简单的GET请求
# ...
start_urls = ['https://blog.scrapinghub.com/']
def parse(self, resposne):
# do something with response ...
# 方式二: 在start_requests函数中进行一些循环或传参
# ...
# 循环生成不同page的url,传给parse回调函数
def start_requests(self):
for page in range(1, 6):
url = 'https://blog.scrapinghub.com/page/{}'.format(page)
# callback默认为self.parse, 可自己指定其他的
yield scrapy.Request(url, callback=self.parse)
def parse(self, response):
# do something with response ...
# ...
# POST方法传参
def start_requests(self):
for page in range(1, 11):
payload = {
'display': 'All',
'page': str(page),
'limit': '10'
}
url = 'http://hgmdtrial.biobase-international.com/hgmd/pro/browseGene.php'
yield FormRequest(url, formdata=payload, callback=self.parse)
# 变量的传递