def __init__(self,settings):
self.proxies = settings.getlist('PROXIES')
@classmethod
def from_crawler(cls,crawler):
if not crawler.settings.getboll('HTTPPROXY_ENABLED'):
raise NotConfigured
def process_request(self,request,spider):
if not request.meta.get('proxy') and request.url not in spider.start:
request.meta['proxy']=random.choice(self.proxies)
def process_response(self,request,response,spider):
return response
def process_exception(self,request,exception,spider):
pass
进入settings.py,设置代理池
# 代理池
#PROXIES =[
# ''
#]
设置优先级,启动中间件
Enable or disable downloader middlewares
See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# 启动中间件
DOWNLOADER_MIDDLEWARES = {
"spider_01.middlewares.Spider01DownloaderMiddleware": 749,#设置优先级
}