1、爬虫页面
class ProxyrandomSpider(scrapy.Spider):
name = 'proxyRandom'
def start_requests(self):
yield scrapy.Request('http://httpbin.org/get',callback=self.parse)
def parse(self, response):
print(response.text)
2、写随机ip中间件
import random
class IpRandomProxyMiddleware(object):
# 定义有效的代理IP列表
proxy = [
'117.88.177.0:3000','117.45.139.179:9006'
]
def process_request(self,request,spider):
proxy = random.choice(self.proxy)
request.meta['proxy'] = 'http://' + proxy
3、settings.py修改
DOWNLOADER_MIDDLEWARES = {
'proxy.middlewares.IpRandomProxyMiddleware': 200,
'proxy.middlewares.ProxyDownloaderMiddleware': None,
}