Path: blob/master/03_alternative_data/01_opentable/opentable/settings.py
2929 views
# -*- coding: utf-8 -*-12BOT_NAME = 'opentable'34SPIDER_MODULES = ['opentable.spiders']5NEWSPIDER_MODULE = 'opentable.spiders'67SPLASH_URL = 'http://localhost:8050/'89DOWNLOADER_MIDDLEWARES = {10'scrapy_splash.SplashCookiesMiddleware' : 723,11'scrapy_splash.SplashMiddleware' : 725,12'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,13# 'scrapy.downloadermiddleware.useragent.UserAgentMiddleware' : None,14'random_useragent.RandomUserAgentMiddleware' : 40015}1617SPIDER_MIDDLEWARES = {18'opentable.middlewares.OpentableSpiderMiddleware': 543,19'scrapy_splash.SplashDeduplicateArgsMiddleware' : 100,20}2122DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'2324HTTPCACHE_STORAGE = 'scrapy_splash.SplashAwareFSCacheStorage'2526# Obey robots.txt rules27ROBOTSTXT_OBEY = True2829# Configure maximum concurrent requests performed by Scrapy (default: 16)30# CONCURRENT_REQUESTS = 323132# Configure a delay for requests for the same website (default: 0)33# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay34# See also autothrottle settings and docs35# DOWNLOAD_DELAY = 336# The download delay setting will honor only one of:37# CONCURRENT_REQUESTS_PER_DOMAIN = 1638# CONCURRENT_REQUESTS_PER_IP = 163940# Disable cookies (enabled by default)41# COOKIES_ENABLED = False4243# Disable Telnet Console (enabled by default)44# TELNETCONSOLE_ENABLED = False4546# Override the default request headers:47# DEFAULT_REQUEST_HEADERS = {48# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',49# 'Accept-Language': 'en',50# }5152CONCURRENT_REQUESTS = 153# Enable or disable extensions54# See https://doc.scrapy.org/en/latest/topics/extensions.html55# EXTENSIONS = {56# 'scrapy.extensions.telnet.TelnetConsole': None,57# }5859# Configure item pipelines60# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html61# ITEM_PIPELINES = {62# 'opentable.pipelines.OpentablePipeline': 300,63# }6465# Enable and configure the AutoThrottle extension (disabled by default)66# See https://doc.scrapy.org/en/latest/topics/autothrottle.html67# AUTOTHROTTLE_ENABLED = True68# The initial download delay69# AUTOTHROTTLE_START_DELAY = 170# The maximum download delay to be set in case of high latencies71# AUTOTHROTTLE_MAX_DELAY = 1072# The average number of requests Scrapy should be sending in parallel to73# each remote server74# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.075# Enable showing throttling stats for every response received:76AUTOTHROTTLE_DEBUG = True7778# Enable and configure HTTP caching (disabled by default)79# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings80# HTTPCACHE_ENABLED = True81# HTTPCACHE_EXPIRATION_SECS = 082# HTTPCACHE_DIR = 'httpcache'83# HTTPCACHE_IGNORE_HTTP_CODES = []84# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'8586# EXTENSIONS = {87# 'opentable.extensions.MonitorDownloadsExtension': 100,88# 'opentable.extensions.DumpStatsExtension' : 101,89# 'scrapy.extensions.logstats.LogStats' : 500,90# }9192LOG_LEVEL = 'DEBUG'93LOG_FILE = 'spider.log'9495USER_AGENT_LIST = "opentable/user_agents.txt"969798