Path: blob/master/finrl/agents/portfolio_optimization/architectures.py
732 views
from __future__ import annotations12import numpy as np3import torch4from torch import nn5from torch_geometric.data import Batch6from torch_geometric.data import Data7from torch_geometric.nn import RGCNConv8from torch_geometric.nn import Sequential9from torch_geometric.utils import to_dense_batch101112class EIIE(nn.Module):13def __init__(14self,15initial_features=3,16k_size=3,17conv_mid_features=2,18conv_final_features=20,19time_window=50,20device="cpu",21):22"""EIIE (ensemble of identical independent evaluators) policy network23initializer.2425Args:26initial_features: Number of input features.27k_size: Size of first convolutional kernel.28conv_mid_features: Size of intermediate convolutional channels.29conv_final_features: Size of final convolutional channels.30time_window: Size of time window used as agent's state.31device: Device in which the neural network will be run.3233Note:34Reference article: https://doi.org/10.48550/arXiv.1706.10059.35"""36super().__init__()37self.device = device3839n_size = time_window - k_size + 14041self.sequential = nn.Sequential(42nn.Conv2d(43in_channels=initial_features,44out_channels=conv_mid_features,45kernel_size=(1, k_size),46),47nn.ReLU(),48nn.Conv2d(49in_channels=conv_mid_features,50out_channels=conv_final_features,51kernel_size=(1, n_size),52),53nn.ReLU(),54)5556self.final_convolution = nn.Conv2d(57in_channels=conv_final_features + 1, out_channels=1, kernel_size=(1, 1)58)5960self.softmax = nn.Sequential(nn.Softmax(dim=-1))6162def mu(self, observation, last_action):63"""Defines a most favorable action of this policy given input x.6465Args:66observation: environment observation.67last_action: Last action performed by agent.6869Returns:70Most favorable action.71"""7273if isinstance(observation, np.ndarray):74observation = torch.from_numpy(observation)75observation = observation.to(self.device).float()7677if isinstance(last_action, np.ndarray):78last_action = torch.from_numpy(last_action)79last_action = last_action.to(self.device).float()8081last_stocks, cash_bias = self._process_last_action(last_action)82cash_bias = torch.zeros_like(cash_bias).to(self.device)8384output = self.sequential(observation) # shape [N, 20, PORTFOLIO_SIZE, 1]85output = torch.cat(86[last_stocks, output], dim=187) # shape [N, 21, PORTFOLIO_SIZE, 1]88output = self.final_convolution(output) # shape [N, 1, PORTFOLIO_SIZE, 1]89output = torch.cat(90[cash_bias, output], dim=291) # shape [N, 1, PORTFOLIO_SIZE + 1, 1]9293# output shape must be [N, features] = [1, PORTFOLIO_SIZE + 1], being N batch size (1)94# and size the number of features (weights vector).95output = torch.squeeze(output, 3)96output = torch.squeeze(output, 1) # shape [N, PORTFOLIO_SIZE + 1]9798output = self.softmax(output)99100return output101102def forward(self, observation, last_action):103"""Policy network's forward propagation.104105Args:106observation: Environment observation (dictionary).107last_action: Last action performed by the agent.108109Returns:110Action to be taken (numpy array).111"""112mu = self.mu(observation, last_action)113action = mu.cpu().detach().numpy().squeeze()114return action115116def _process_last_action(self, last_action):117"""Process the last action to retrieve cash bias and last stocks.118119Args:120last_action: Last performed action.121122Returns:123Last stocks and cash bias.124"""125batch_size = last_action.shape[0]126stocks = last_action.shape[1] - 1127last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1))128cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1))129return last_stocks, cash_bias130131132class EI3(nn.Module):133def __init__(134self,135initial_features=3,136k_short=3,137k_medium=21,138conv_mid_features=3,139conv_final_features=20,140time_window=50,141device="cpu",142):143"""EI3 (ensemble of identical independent inception) policy network144initializer.145146Args:147initial_features: Number of input features.148k_short: Size of short convolutional kernel.149k_medium: Size of medium convolutional kernel.150conv_mid_features: Size of intermediate convolutional channels.151conv_final_features: Size of final convolutional channels.152time_window: Size of time window used as agent's state.153device: Device in which the neural network will be run.154155Note:156Reference article: https://doi.org/10.1145/3357384.3357961.157"""158super().__init__()159self.device = device160161n_short = time_window - k_short + 1162n_medium = time_window - k_medium + 1163n_long = time_window164165self.short_term = nn.Sequential(166nn.Conv2d(167in_channels=initial_features,168out_channels=conv_mid_features,169kernel_size=(1, k_short),170),171nn.ReLU(),172nn.Conv2d(173in_channels=conv_mid_features,174out_channels=conv_final_features,175kernel_size=(1, n_short),176),177nn.ReLU(),178)179180self.mid_term = nn.Sequential(181nn.Conv2d(182in_channels=initial_features,183out_channels=conv_mid_features,184kernel_size=(1, k_medium),185),186nn.ReLU(),187nn.Conv2d(188in_channels=conv_mid_features,189out_channels=conv_final_features,190kernel_size=(1, n_medium),191),192nn.ReLU(),193)194195self.long_term = nn.Sequential(nn.MaxPool2d(kernel_size=(1, n_long)), nn.ReLU())196197self.final_convolution = nn.Conv2d(198in_channels=2 * conv_final_features + initial_features + 1,199out_channels=1,200kernel_size=(1, 1),201)202203self.softmax = nn.Sequential(nn.Softmax(dim=-1))204205def mu(self, observation, last_action):206"""Defines a most favorable action of this policy given input x.207208Args:209observation: environment observation.210last_action: Last action performed by agent.211212Returns:213Most favorable action.214"""215216if isinstance(observation, np.ndarray):217observation = torch.from_numpy(observation)218observation = observation.to(self.device).float()219220if isinstance(last_action, np.ndarray):221last_action = torch.from_numpy(last_action)222last_action = last_action.to(self.device).float()223224last_stocks, cash_bias = self._process_last_action(last_action)225cash_bias = torch.zeros_like(cash_bias).to(self.device)226227short_features = self.short_term(observation)228medium_features = self.mid_term(observation)229long_features = self.long_term(observation)230231features = torch.cat(232[last_stocks, short_features, medium_features, long_features], dim=1233)234output = self.final_convolution(features)235output = torch.cat([cash_bias, output], dim=2)236237# output shape must be [N, features] = [1, PORTFOLIO_SIZE + 1], being N batch size (1)238# and size the number of features (weights vector).239output = torch.squeeze(output, 3)240output = torch.squeeze(output, 1) # shape [N, PORTFOLIO_SIZE + 1]241242output = self.softmax(output)243244return output245246def forward(self, observation, last_action):247"""Policy network's forward propagation.248249Args:250observation: Environment observation (dictionary).251last_action: Last action performed by the agent.252253Returns:254Action to be taken (numpy array).255"""256mu = self.mu(observation, last_action)257action = mu.cpu().detach().numpy().squeeze()258return action259260def _process_last_action(self, last_action):261"""Process the last action to retrieve cash bias and last stocks.262263Args:264last_action: Last performed action.265266Returns:267Last stocks and cash bias.268"""269batch_size = last_action.shape[0]270stocks = last_action.shape[1] - 1271last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1))272cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1))273return last_stocks, cash_bias274275276class GPM(nn.Module):277def __init__(278self,279edge_index,280edge_type,281nodes_to_select,282initial_features=3,283k_short=3,284k_medium=21,285conv_mid_features=3,286conv_final_features=20,287graph_layers=1,288time_window=50,289softmax_temperature=1,290device="cpu",291):292"""GPM (Graph-based Portfolio Management) policy network initializer.293294Args:295edge_index: Graph connectivity in COO format.296edge_type: Type of each edge in edge_index.297nodes_to_select: ID of nodes to be selected to the portfolio.298initial_features: Number of input features.299k_short: Size of short convolutional kernel.300k_medium: Size of medium convolutional kernel.301conv_mid_features: Size of intermediate convolutional channels.302conv_final_features: Size of final convolutional channels.303graph_layers: Number of graph neural network layers.304time_window: Size of time window used as agent's state.305softmax_temperature: Temperature parameter to softmax function.306device: Device in which the neural network will be run.307308Note:309Reference article: https://doi.org/10.1016/j.neucom.2022.04.105.310"""311super().__init__()312self.device = device313self.softmax_temperature = softmax_temperature314315num_relations = np.unique(edge_type).shape[0]316317if isinstance(edge_index, np.ndarray):318edge_index = torch.from_numpy(edge_index)319self.edge_index = edge_index.to(self.device).long()320321if isinstance(edge_type, np.ndarray):322edge_type = torch.from_numpy(edge_type)323self.edge_type = edge_type.to(self.device).long()324325if isinstance(nodes_to_select, np.ndarray):326nodes_to_select = torch.from_numpy(nodes_to_select)327elif isinstance(nodes_to_select, list):328nodes_to_select = torch.tensor(nodes_to_select)329self.nodes_to_select = nodes_to_select.to(self.device)330331n_short = time_window - k_short + 1332n_medium = time_window - k_medium + 1333n_long = time_window334335self.short_term = nn.Sequential(336nn.Conv2d(337in_channels=initial_features,338out_channels=conv_mid_features,339kernel_size=(1, k_short),340),341nn.ReLU(),342nn.Conv2d(343in_channels=conv_mid_features,344out_channels=conv_final_features,345kernel_size=(1, n_short),346),347nn.ReLU(),348)349350self.mid_term = nn.Sequential(351nn.Conv2d(352in_channels=initial_features,353out_channels=conv_mid_features,354kernel_size=(1, k_medium),355),356nn.ReLU(),357nn.Conv2d(358in_channels=conv_mid_features,359out_channels=conv_final_features,360kernel_size=(1, n_medium),361),362nn.ReLU(),363)364365self.long_term = nn.Sequential(nn.MaxPool2d(kernel_size=(1, n_long)), nn.ReLU())366367feature_size = 2 * conv_final_features + initial_features368369graph_layers_list = []370for i in range(graph_layers):371graph_layers_list += [372(373RGCNConv(feature_size, feature_size, num_relations),374"x, edge_index, edge_type -> x",375),376nn.LeakyReLU(),377]378379self.gcn = Sequential("x, edge_index, edge_type", graph_layers_list)380381self.final_convolution = nn.Conv2d(382in_channels=2 * feature_size + 1,383out_channels=1,384kernel_size=(1, 1),385)386387self.softmax = nn.Sequential(nn.Softmax(dim=-1))388389def mu(self, observation, last_action):390"""Defines a most favorable action of this policy given input x.391392Args:393observation: environment observation.394last_action: Last action performed by agent.395396Returns:397Most favorable action.398"""399400if isinstance(observation, np.ndarray):401observation = torch.from_numpy(observation)402observation = observation.to(self.device).float()403404if isinstance(last_action, np.ndarray):405last_action = torch.from_numpy(last_action)406last_action = last_action.to(self.device).float()407408last_stocks, cash_bias = self._process_last_action(last_action)409cash_bias = torch.zeros_like(cash_bias).to(self.device)410411short_features = self.short_term(observation)412medium_features = self.mid_term(observation)413long_features = self.long_term(observation)414415temporal_features = torch.cat(416[short_features, medium_features, long_features], dim=1417) # shape [N, feature_size, num_stocks, 1]418419# add features to graph420graph_batch = self._create_graph_batch(temporal_features, self.edge_index)421422# set edge index for the batch423edge_type = self._create_edge_type_for_batch(graph_batch, self.edge_type)424425# perform graph convolution426graph_features = self.gcn(427graph_batch.x, graph_batch.edge_index, edge_type428) # shape [N * num_stocks, feature_size]429graph_features, _ = to_dense_batch(430graph_features, graph_batch.batch431) # shape [N, num_stocks, feature_size]432graph_features = torch.transpose(433graph_features, 1, 2434) # shape [N, feature_size, num_stocks]435graph_features = torch.unsqueeze(436graph_features, 3437) # shape [N, feature_size, num_stocks, 1]438graph_features = graph_features.to(self.device)439440# concatenate graph features and temporal features441features = torch.cat(442[temporal_features, graph_features], dim=1443) # shape [N, 2 * feature_size, num_stocks, 1]444445# perform selection and add last stocks446features = torch.index_select(447features, dim=2, index=self.nodes_to_select448) # shape [N, 2 * feature_size, portfolio_size, 1]449features = torch.cat([last_stocks, features], dim=1)450451# final convolution452output = self.final_convolution(features) # shape [N, 1, portfolio_size, 1]453output = torch.cat(454[cash_bias, output], dim=2455) # shape [N, 1, portfolio_size + 1, 1]456457# output shape must be [N, portfolio_size + 1] = [1, portfolio_size + 1], being N batch size458output = torch.squeeze(output, 3)459output = torch.squeeze(output, 1) # shape [N, portfolio_size + 1]460461output = self.softmax(output / self.softmax_temperature)462463return output464465def forward(self, observation, last_action):466"""Policy network's forward propagation.467468Args:469observation: Environment observation (dictionary).470last_action: Last action performed by the agent.471472Returns:473Action to be taken (numpy array).474"""475mu = self.mu(observation, last_action)476action = mu.cpu().detach().numpy().squeeze()477return action478479def _process_last_action(self, last_action):480"""Process the last action to retrieve cash bias and last stocks.481482Args:483last_action: Last performed action.484485Returns:486Last stocks and cash bias.487"""488batch_size = last_action.shape[0]489stocks = last_action.shape[1] - 1490last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1))491cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1))492return last_stocks, cash_bias493494def _create_graph_batch(self, features, edge_index):495"""Create a batch of graphs with the features.496497Args:498features: Tensor of shape [batch_size, feature_size, num_stocks, 1].499edge_index: Graph connectivity in COO format.500501Returns:502A batch of graphs with temporal features associated with each node.503"""504batch_size = features.shape[0]505graphs = []506for i in range(batch_size):507x = features[i, :, :, 0] # shape [feature_size, num_stocks]508x = torch.transpose(x, 0, 1) # shape [num_stocks, feature_size]509new_graph = Data(x=x, edge_index=edge_index).to(self.device)510graphs.append(new_graph)511return Batch.from_data_list(graphs)512513def _create_edge_type_for_batch(self, batch, edge_type):514"""Create the edge type tensor for a batch of graphs.515516Args:517batch: Batch of graph data.518edge_type: Original edge type tensor.519520Returns:521Edge type tensor adapted for the batch.522"""523batch_edge_type = torch.clone(edge_type).detach()524for i in range(1, batch.batch_size):525batch_edge_type = torch.cat(526[batch_edge_type, torch.clone(edge_type).detach()]527)528return batch_edge_type529530531