Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/examples/FinRL_StockTrading_2026_1_data.py
1706 views
1
"""
2
Stock NeurIPS2018 Part 1. Data
3
4
This series is a reproduction of paper "Deep reinforcement learning for automated stock trading: An ensemble strategy".
5
6
Introduce how to use FinRL to fetch and process data that we need for ML/RL trading.
7
"""
8
9
import itertools
10
11
import pandas as pd
12
import yfinance as yf
13
14
from finrl import config_tickers
15
from finrl.config import INDICATORS, TRAIN_START_DATE, TRAIN_END_DATE, TRADE_START_DATE, TRADE_END_DATE
16
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
17
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
18
19
# %% Part 1. Fetch data - Single ticker
20
21
# Using yfinance directly
22
aapl_df_yf = yf.download(tickers="aapl", start="2020-01-01", end="2020-01-31")
23
print("=== yfinance download ===")
24
print(aapl_df_yf.head())
25
26
# Using FinRL's YahooDownloader
27
aapl_df_finrl = YahooDownloader(
28
start_date="2020-01-01",
29
end_date="2020-01-31",
30
ticker_list=["aapl"],
31
).fetch_data()
32
print("\n=== FinRL YahooDownloader ===")
33
print(aapl_df_finrl.head())
34
35
# %% Part 2. Fetch data - DOW 30 tickers
36
37
print("\n=== DOW 30 Tickers ===")
38
print(config_tickers.DOW_30_TICKER)
39
40
df_raw = YahooDownloader(
41
start_date=TRAIN_START_DATE,
42
end_date=TRADE_END_DATE,
43
ticker_list=config_tickers.DOW_30_TICKER,
44
).fetch_data()
45
print("\n=== Raw data ===")
46
print(df_raw.head())
47
48
# %% Part 3. Preprocess data
49
50
fe = FeatureEngineer(
51
use_technical_indicator=True,
52
tech_indicator_list=INDICATORS,
53
use_vix=True,
54
use_turbulence=True,
55
user_defined_feature=False,
56
)
57
58
processed = fe.preprocess_data(df_raw)
59
60
list_ticker = processed["tic"].unique().tolist()
61
list_date = list(
62
pd.date_range(processed["date"].min(), processed["date"].max()).astype(str)
63
)
64
combination = list(itertools.product(list_date, list_ticker))
65
66
processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(
67
processed, on=["date", "tic"], how="left"
68
)
69
processed_full = processed_full[processed_full["date"].isin(processed["date"])]
70
processed_full = processed_full.sort_values(["date", "tic"])
71
processed_full = processed_full.fillna(0)
72
73
print("\n=== Processed data ===")
74
print(processed_full.head())
75
76
# %% Part 4. Split and save data
77
78
train = data_split(processed_full, TRAIN_START_DATE, TRAIN_END_DATE)
79
trade = data_split(processed_full, TRADE_START_DATE, TRADE_END_DATE)
80
print(f"\nTrain data length: {len(train)}")
81
print(f"Trade data length: {len(trade)}")
82
83
train.to_csv("train_data.csv")
84
trade.to_csv("trade_data.csv")
85
print("Data saved to train_data.csv and trade_data.csv")
86
87