Scrapy Downloader Middleware之RandomUserAgent 发表于 2019-02-24 | 分类于 学习 | 评论数: | 阅读次数:Scrapy集成随机UA这里利用了fake-useragent包提供的UserAgent,安装请使用pip install fake-useragent。12345678910111213141516171819202122232425262728293031323334353637# middlewares.py# -*- coding: utf-8 -*-# author: Conyyonfrom fake_useragent import UserAgentclass RandomUserAgentMiddleware(object): def __init__(self, random_ua_on, random_ua_type): super(RandomUserAgentMiddleware, self).__init__() self.ua = UserAgent() self.ua_on = random_ua_on self.ua_type = random_ua_type @classmethod def from_crawler(cls, clawler): return cls( random_ua_on=clawler.settings.get('RANDOM_USER_AGENT', False), random_ua_type=clawler.settings.get('RANDOM_USER_AGENT_TYPE', 'random') ) def process_request(self, request, spider): spider.logger.info('Checking RANDOM_USER_AGENT ON/OFF...') spider.logger.info('RANDOM_USER_AGENT: %s' % ('ON' if self.ua_on else 'OFF',)) if self.ua_on: spider.logger.info('Checking RANDOM_USER_AGENT_TYPE...') try: random_ua = getattr(self.ua, self.ua_type) spider.logger.info('RANDOM_USER_AGENT_TYPE: %s' % (self.ua_type,)) except Exception as e: spider.logger.info(str(e)) self.ua_type = 'random' random_ua = getattr(self.ua, self.ua_type) spider.logger.info('Switching RANDOM_USER_AGENT_TYPE to %s' % (self.ua_type,)) request.headers['User-Agent'] = random_ua spider.logger.info('Set RANDOM_USER_AGENT Success')123456789101112# settings.py# -*- coding: utf-8 -*-# author: Conyyon# Choose one in ['random', 'ie', 'opera', 'chrome', 'firefox', 'safari'] as USER_AGENT_TYPE(random by default)RANDOM_USER_AGENT = True# RANDOM_USER_AGENT_TYPE = 'chrome'DOWNLOADER_MIDDLEWARES = { 'xxx.middlewares.RandomUserAgentMiddleware': 543,} 本文作者: Conyyon 本文链接: https://conyyon.ren/posts/48137/ 版权声明: 本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!