Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/meta/preprocessor/tusharedownloader.py
732 views
1
"""Contains methods and classes to collect data from
2
tushare API
3
"""
4
5
from __future__ import annotations
6
7
import pandas as pd
8
import tushare as ts
9
from tqdm import tqdm
10
11
12
class TushareDownloader:
13
"""Provides methods for retrieving daily stock data from
14
tushare API
15
Attributes
16
----------
17
start_date : str
18
start date of the data (modified from config.py)
19
end_date : str
20
end date of the data (modified from config.py)
21
ticker_list : list
22
a list of stock tickers (modified from config.py)
23
Methods
24
-------
25
fetch_data()
26
Fetches data from tushare API
27
date: date
28
Open: opening price
29
High: the highest price
30
Close: closing price
31
Low: lowest price
32
Volume: volume
33
Price_change: price change
34
P_change: fluctuation
35
ma5: 5-day average price
36
Ma10: 10 average daily price
37
Ma20:20 average daily price
38
V_ma5:5 daily average
39
V_ma10:10 daily average
40
V_ma20:20 daily average
41
"""
42
43
def __init__(self, start_date: str, end_date: str, ticker_list: list):
44
self.start_date = start_date
45
self.end_date = end_date
46
self.ticker_list = ticker_list
47
48
def fetch_data(self) -> pd.DataFrame:
49
"""Fetches data from Alpaca
50
Parameters
51
----------
52
Returns
53
-------
54
`pd.DataFrame`
55
7 columns: A date, open, high, low, close, volume and tick symbol
56
for the specified stock ticker
57
"""
58
# Download and save the data in a pandas DataFrame:
59
data_df = pd.DataFrame()
60
for tic in tqdm(self.ticker_list, total=len(self.ticker_list)):
61
temp_df = ts.get_hist_data(
62
tic[0:6], start=self.start_date, end=self.end_date
63
)
64
temp_df["tic"] = tic[0:6]
65
# data_df = data_df.append(temp_df)
66
data_df = pd.concat([data_df, temp_df], axis=0, ignore_index=True)
67
68
data_df = data_df.reset_index(level="date")
69
70
# create day of the week column (monday = 0)
71
data_df = data_df.drop(
72
[
73
"price_change",
74
"p_change",
75
"ma5",
76
"ma10",
77
"ma20",
78
"v_ma5",
79
"v_ma10",
80
"v_ma20",
81
],
82
1,
83
)
84
data_df["day"] = pd.to_datetime(data_df["date"]).dt.dayofweek
85
# rank desc
86
data_df = data_df.sort_index(axis=0, ascending=False)
87
# convert date to standard string format, easy to filter
88
data_df["date"] = pd.to_datetime(data_df["date"])
89
data_df["date"] = data_df.date.apply(lambda x: x.strftime("%Y-%m-%d"))
90
# drop missing data
91
data_df = data_df.dropna()
92
data_df = data_df.reset_index(drop=True)
93
print("Shape of DataFrame: ", data_df.shape)
94
# print("Display DataFrame: ", data_df.head())
95
print(data_df)
96
data_df = data_df.sort_values(by=["date", "tic"]).reset_index(drop=True)
97
return data_df
98
99
def select_equal_rows_stock(self, df):
100
df_check = df.tic.value_counts()
101
df_check = pd.DataFrame(df_check).reset_index()
102
df_check.columns = ["tic", "counts"]
103
mean_df = df_check.counts.mean()
104
equal_list = list(df.tic.value_counts() >= mean_df)
105
names = df.tic.value_counts().index
106
select_stocks_list = list(names[equal_list])
107
df = df[df.tic.isin(select_stocks_list)]
108
return df
109
110