Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
packtpublishing
GitHub Repository: packtpublishing/machine-learning-for-algorithmic-trading-second-edition
Path: blob/master/03_alternative_data/01_opentable/opentable/extensions.py
2929 views
1
import logging
2
import pprint
3
4
from twisted.internet.task import LoopingCall
5
from scrapy import signals
6
7
logger = logging.getLogger(__name__)
8
9
10
class _LoopingExtension:
11
def setup_looping_task(self, task, crawler, interval):
12
self._interval = interval
13
self._task = LoopingCall(task)
14
crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
15
crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
16
17
def spider_opened(self):
18
self._task.start(self._interval, now=False)
19
20
def spider_closed(self):
21
if self._task.running:
22
self._task.stop()
23
24
25
class MonitorDownloadsExtension(_LoopingExtension):
26
"""
27
Enable this extension to periodically log a number of active downloads.
28
"""
29
30
def __init__(self, crawler, interval):
31
self.crawler = crawler
32
self.setup_looping_task(self.monitor, crawler, interval)
33
34
@classmethod
35
def from_crawler(cls, crawler):
36
# fixme: 0 should mean NotConfigured
37
interval = crawler.settings.getfloat("MONITOR_DOWNLOADS_INTERVAL", 10.0)
38
return cls(crawler, interval)
39
40
def monitor(self):
41
active_downloads = len(self.crawler.engine.downloader.active)
42
logger.info("Active downloads: {}".format(active_downloads))
43
44
45
class DumpStatsExtension(_LoopingExtension):
46
"""
47
Enable this extension to log Scrapy stats periodically, not only
48
at the end of the crawl.
49
"""
50
51
def __init__(self, crawler, interval):
52
self.stats = crawler.stats
53
self.setup_looping_task(self.print_stats, crawler, interval)
54
55
def print_stats(self):
56
stats = self.stats.get_stats()
57
logger.info("Scrapy stats:\n" + pprint.pformat(stats))
58
59
@classmethod
60
def from_crawler(cls, crawler):
61
interval = crawler.settings.getfloat("DUMP_STATS_INTERVAL", 60.0)
62
# fixme: 0 should mean NotConfigured
63
return cls(crawler, interval)
64
65