Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wiseplat
GitHub Repository: wiseplat/python-code
Path: blob/master/ invest-robot-contest_tinvest_robot-master/tinvest_robot_perevalov/news_fetcher.py
5932 views
1
import feedparser
2
3
from tinvest_robot_perevalov import _db
4
from tinvest_robot_perevalov.sentiment_analyzer import SentimentAnalyzer
5
6
import csv
7
import urllib.request
8
import os
9
10
import logging
11
12
logging.basicConfig(
13
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
14
)
15
16
logger = logging.getLogger(__name__)
17
18
def _init_sentiment_analyzer() -> SentimentAnalyzer:
19
"""Internal method for initializing sentiment analyzer. To be extended in the future.
20
21
Returns:
22
SentimentAnalyzer: Initialized sentiment analyzer
23
"""
24
MODEL = os.getenv('SENTIMENT_MODEL') or "cardiffnlp/twitter-roberta-base-sentiment"
25
THRESHOLD = 0.1
26
27
logger.info("Downloading labels...")
28
29
labels=[]
30
mapping_link = "https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt"
31
with urllib.request.urlopen(mapping_link) as f:
32
html = f.read().decode('utf-8').split("\n")
33
csvreader = csv.reader(html, delimiter='\t')
34
labels = [row[1] for row in csvreader if len(row) > 1]
35
36
logger.info("Initializing sentiment analyzer...")
37
38
return SentimentAnalyzer(MODEL, THRESHOLD, labels)
39
40
def fetch_and_analyze(rss_feeds: list):
41
"""
42
Fetch news from RSS feeds, analyze sentiment, and save to database
43
44
Args:
45
rss_feeds (list): list of RSS feeds URLs to fetch
46
"""
47
_db.init_db()
48
sentiment_analyzer = _init_sentiment_analyzer()
49
50
for feed in rss_feeds:
51
entries = feedparser.parse(feed).entries
52
for entry in entries:
53
if not _db.check_if_exists(entry.title):
54
sentiment = sentiment_analyzer.predict_sentiment(entry.title)
55
_db.put_in_db(entry.title, sentiment)
56
logger.info(f"TEXT: {entry.title} // SENTIMENT: {sentiment}")
57
58