Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
packtpublishing
GitHub Repository: packtpublishing/machine-learning-for-algorithmic-trading-second-edition
Path: blob/master/03_alternative_data/01_opentable/opentable/settings.py
2929 views
1
# -*- coding: utf-8 -*-
2
3
BOT_NAME = 'opentable'
4
5
SPIDER_MODULES = ['opentable.spiders']
6
NEWSPIDER_MODULE = 'opentable.spiders'
7
8
SPLASH_URL = 'http://localhost:8050/'
9
10
DOWNLOADER_MIDDLEWARES = {
11
'scrapy_splash.SplashCookiesMiddleware' : 723,
12
'scrapy_splash.SplashMiddleware' : 725,
13
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
14
# 'scrapy.downloadermiddleware.useragent.UserAgentMiddleware' : None,
15
'random_useragent.RandomUserAgentMiddleware' : 400
16
}
17
18
SPIDER_MIDDLEWARES = {
19
'opentable.middlewares.OpentableSpiderMiddleware': 543,
20
'scrapy_splash.SplashDeduplicateArgsMiddleware' : 100,
21
}
22
23
DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'
24
25
HTTPCACHE_STORAGE = 'scrapy_splash.SplashAwareFSCacheStorage'
26
27
# Obey robots.txt rules
28
ROBOTSTXT_OBEY = True
29
30
# Configure maximum concurrent requests performed by Scrapy (default: 16)
31
# CONCURRENT_REQUESTS = 32
32
33
# Configure a delay for requests for the same website (default: 0)
34
# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
35
# See also autothrottle settings and docs
36
# DOWNLOAD_DELAY = 3
37
# The download delay setting will honor only one of:
38
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
39
# CONCURRENT_REQUESTS_PER_IP = 16
40
41
# Disable cookies (enabled by default)
42
# COOKIES_ENABLED = False
43
44
# Disable Telnet Console (enabled by default)
45
# TELNETCONSOLE_ENABLED = False
46
47
# Override the default request headers:
48
# DEFAULT_REQUEST_HEADERS = {
49
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
50
# 'Accept-Language': 'en',
51
# }
52
53
CONCURRENT_REQUESTS = 1
54
# Enable or disable extensions
55
# See https://doc.scrapy.org/en/latest/topics/extensions.html
56
# EXTENSIONS = {
57
# 'scrapy.extensions.telnet.TelnetConsole': None,
58
# }
59
60
# Configure item pipelines
61
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
62
# ITEM_PIPELINES = {
63
# 'opentable.pipelines.OpentablePipeline': 300,
64
# }
65
66
# Enable and configure the AutoThrottle extension (disabled by default)
67
# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
68
# AUTOTHROTTLE_ENABLED = True
69
# The initial download delay
70
# AUTOTHROTTLE_START_DELAY = 1
71
# The maximum download delay to be set in case of high latencies
72
# AUTOTHROTTLE_MAX_DELAY = 10
73
# The average number of requests Scrapy should be sending in parallel to
74
# each remote server
75
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
76
# Enable showing throttling stats for every response received:
77
AUTOTHROTTLE_DEBUG = True
78
79
# Enable and configure HTTP caching (disabled by default)
80
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
81
# HTTPCACHE_ENABLED = True
82
# HTTPCACHE_EXPIRATION_SECS = 0
83
# HTTPCACHE_DIR = 'httpcache'
84
# HTTPCACHE_IGNORE_HTTP_CODES = []
85
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
86
87
# EXTENSIONS = {
88
# 'opentable.extensions.MonitorDownloadsExtension': 100,
89
# 'opentable.extensions.DumpStatsExtension' : 101,
90
# 'scrapy.extensions.logstats.LogStats' : 500,
91
# }
92
93
LOG_LEVEL = 'DEBUG'
94
LOG_FILE = 'spider.log'
95
96
USER_AGENT_LIST = "opentable/user_agents.txt"
97
98