Path: blob/master/examples/FinRL_StockTrading_2026_1_data.py
1706 views
"""1Stock NeurIPS2018 Part 1. Data23This series is a reproduction of paper "Deep reinforcement learning for automated stock trading: An ensemble strategy".45Introduce how to use FinRL to fetch and process data that we need for ML/RL trading.6"""78import itertools910import pandas as pd11import yfinance as yf1213from finrl import config_tickers14from finrl.config import INDICATORS, TRAIN_START_DATE, TRAIN_END_DATE, TRADE_START_DATE, TRADE_END_DATE15from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split16from finrl.meta.preprocessor.yahoodownloader import YahooDownloader1718# %% Part 1. Fetch data - Single ticker1920# Using yfinance directly21aapl_df_yf = yf.download(tickers="aapl", start="2020-01-01", end="2020-01-31")22print("=== yfinance download ===")23print(aapl_df_yf.head())2425# Using FinRL's YahooDownloader26aapl_df_finrl = YahooDownloader(27start_date="2020-01-01",28end_date="2020-01-31",29ticker_list=["aapl"],30).fetch_data()31print("\n=== FinRL YahooDownloader ===")32print(aapl_df_finrl.head())3334# %% Part 2. Fetch data - DOW 30 tickers3536print("\n=== DOW 30 Tickers ===")37print(config_tickers.DOW_30_TICKER)3839df_raw = YahooDownloader(40start_date=TRAIN_START_DATE,41end_date=TRADE_END_DATE,42ticker_list=config_tickers.DOW_30_TICKER,43).fetch_data()44print("\n=== Raw data ===")45print(df_raw.head())4647# %% Part 3. Preprocess data4849fe = FeatureEngineer(50use_technical_indicator=True,51tech_indicator_list=INDICATORS,52use_vix=True,53use_turbulence=True,54user_defined_feature=False,55)5657processed = fe.preprocess_data(df_raw)5859list_ticker = processed["tic"].unique().tolist()60list_date = list(61pd.date_range(processed["date"].min(), processed["date"].max()).astype(str)62)63combination = list(itertools.product(list_date, list_ticker))6465processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(66processed, on=["date", "tic"], how="left"67)68processed_full = processed_full[processed_full["date"].isin(processed["date"])]69processed_full = processed_full.sort_values(["date", "tic"])70processed_full = processed_full.fillna(0)7172print("\n=== Processed data ===")73print(processed_full.head())7475# %% Part 4. Split and save data7677train = data_split(processed_full, TRAIN_START_DATE, TRAIN_END_DATE)78trade = data_split(processed_full, TRADE_START_DATE, TRADE_END_DATE)79print(f"\nTrain data length: {len(train)}")80print(f"Trade data length: {len(trade)}")8182train.to_csv("train_data.csv")83trade.to_csv("trade_data.csv")84print("Data saved to train_data.csv and trade_data.csv")858687