Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/agents/portfolio_optimization/architectures.py
732 views
1
from __future__ import annotations
2
3
import numpy as np
4
import torch
5
from torch import nn
6
from torch_geometric.data import Batch
7
from torch_geometric.data import Data
8
from torch_geometric.nn import RGCNConv
9
from torch_geometric.nn import Sequential
10
from torch_geometric.utils import to_dense_batch
11
12
13
class EIIE(nn.Module):
14
def __init__(
15
self,
16
initial_features=3,
17
k_size=3,
18
conv_mid_features=2,
19
conv_final_features=20,
20
time_window=50,
21
device="cpu",
22
):
23
"""EIIE (ensemble of identical independent evaluators) policy network
24
initializer.
25
26
Args:
27
initial_features: Number of input features.
28
k_size: Size of first convolutional kernel.
29
conv_mid_features: Size of intermediate convolutional channels.
30
conv_final_features: Size of final convolutional channels.
31
time_window: Size of time window used as agent's state.
32
device: Device in which the neural network will be run.
33
34
Note:
35
Reference article: https://doi.org/10.48550/arXiv.1706.10059.
36
"""
37
super().__init__()
38
self.device = device
39
40
n_size = time_window - k_size + 1
41
42
self.sequential = nn.Sequential(
43
nn.Conv2d(
44
in_channels=initial_features,
45
out_channels=conv_mid_features,
46
kernel_size=(1, k_size),
47
),
48
nn.ReLU(),
49
nn.Conv2d(
50
in_channels=conv_mid_features,
51
out_channels=conv_final_features,
52
kernel_size=(1, n_size),
53
),
54
nn.ReLU(),
55
)
56
57
self.final_convolution = nn.Conv2d(
58
in_channels=conv_final_features + 1, out_channels=1, kernel_size=(1, 1)
59
)
60
61
self.softmax = nn.Sequential(nn.Softmax(dim=-1))
62
63
def mu(self, observation, last_action):
64
"""Defines a most favorable action of this policy given input x.
65
66
Args:
67
observation: environment observation.
68
last_action: Last action performed by agent.
69
70
Returns:
71
Most favorable action.
72
"""
73
74
if isinstance(observation, np.ndarray):
75
observation = torch.from_numpy(observation)
76
observation = observation.to(self.device).float()
77
78
if isinstance(last_action, np.ndarray):
79
last_action = torch.from_numpy(last_action)
80
last_action = last_action.to(self.device).float()
81
82
last_stocks, cash_bias = self._process_last_action(last_action)
83
cash_bias = torch.zeros_like(cash_bias).to(self.device)
84
85
output = self.sequential(observation) # shape [N, 20, PORTFOLIO_SIZE, 1]
86
output = torch.cat(
87
[last_stocks, output], dim=1
88
) # shape [N, 21, PORTFOLIO_SIZE, 1]
89
output = self.final_convolution(output) # shape [N, 1, PORTFOLIO_SIZE, 1]
90
output = torch.cat(
91
[cash_bias, output], dim=2
92
) # shape [N, 1, PORTFOLIO_SIZE + 1, 1]
93
94
# output shape must be [N, features] = [1, PORTFOLIO_SIZE + 1], being N batch size (1)
95
# and size the number of features (weights vector).
96
output = torch.squeeze(output, 3)
97
output = torch.squeeze(output, 1) # shape [N, PORTFOLIO_SIZE + 1]
98
99
output = self.softmax(output)
100
101
return output
102
103
def forward(self, observation, last_action):
104
"""Policy network's forward propagation.
105
106
Args:
107
observation: Environment observation (dictionary).
108
last_action: Last action performed by the agent.
109
110
Returns:
111
Action to be taken (numpy array).
112
"""
113
mu = self.mu(observation, last_action)
114
action = mu.cpu().detach().numpy().squeeze()
115
return action
116
117
def _process_last_action(self, last_action):
118
"""Process the last action to retrieve cash bias and last stocks.
119
120
Args:
121
last_action: Last performed action.
122
123
Returns:
124
Last stocks and cash bias.
125
"""
126
batch_size = last_action.shape[0]
127
stocks = last_action.shape[1] - 1
128
last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1))
129
cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1))
130
return last_stocks, cash_bias
131
132
133
class EI3(nn.Module):
134
def __init__(
135
self,
136
initial_features=3,
137
k_short=3,
138
k_medium=21,
139
conv_mid_features=3,
140
conv_final_features=20,
141
time_window=50,
142
device="cpu",
143
):
144
"""EI3 (ensemble of identical independent inception) policy network
145
initializer.
146
147
Args:
148
initial_features: Number of input features.
149
k_short: Size of short convolutional kernel.
150
k_medium: Size of medium convolutional kernel.
151
conv_mid_features: Size of intermediate convolutional channels.
152
conv_final_features: Size of final convolutional channels.
153
time_window: Size of time window used as agent's state.
154
device: Device in which the neural network will be run.
155
156
Note:
157
Reference article: https://doi.org/10.1145/3357384.3357961.
158
"""
159
super().__init__()
160
self.device = device
161
162
n_short = time_window - k_short + 1
163
n_medium = time_window - k_medium + 1
164
n_long = time_window
165
166
self.short_term = nn.Sequential(
167
nn.Conv2d(
168
in_channels=initial_features,
169
out_channels=conv_mid_features,
170
kernel_size=(1, k_short),
171
),
172
nn.ReLU(),
173
nn.Conv2d(
174
in_channels=conv_mid_features,
175
out_channels=conv_final_features,
176
kernel_size=(1, n_short),
177
),
178
nn.ReLU(),
179
)
180
181
self.mid_term = nn.Sequential(
182
nn.Conv2d(
183
in_channels=initial_features,
184
out_channels=conv_mid_features,
185
kernel_size=(1, k_medium),
186
),
187
nn.ReLU(),
188
nn.Conv2d(
189
in_channels=conv_mid_features,
190
out_channels=conv_final_features,
191
kernel_size=(1, n_medium),
192
),
193
nn.ReLU(),
194
)
195
196
self.long_term = nn.Sequential(nn.MaxPool2d(kernel_size=(1, n_long)), nn.ReLU())
197
198
self.final_convolution = nn.Conv2d(
199
in_channels=2 * conv_final_features + initial_features + 1,
200
out_channels=1,
201
kernel_size=(1, 1),
202
)
203
204
self.softmax = nn.Sequential(nn.Softmax(dim=-1))
205
206
def mu(self, observation, last_action):
207
"""Defines a most favorable action of this policy given input x.
208
209
Args:
210
observation: environment observation.
211
last_action: Last action performed by agent.
212
213
Returns:
214
Most favorable action.
215
"""
216
217
if isinstance(observation, np.ndarray):
218
observation = torch.from_numpy(observation)
219
observation = observation.to(self.device).float()
220
221
if isinstance(last_action, np.ndarray):
222
last_action = torch.from_numpy(last_action)
223
last_action = last_action.to(self.device).float()
224
225
last_stocks, cash_bias = self._process_last_action(last_action)
226
cash_bias = torch.zeros_like(cash_bias).to(self.device)
227
228
short_features = self.short_term(observation)
229
medium_features = self.mid_term(observation)
230
long_features = self.long_term(observation)
231
232
features = torch.cat(
233
[last_stocks, short_features, medium_features, long_features], dim=1
234
)
235
output = self.final_convolution(features)
236
output = torch.cat([cash_bias, output], dim=2)
237
238
# output shape must be [N, features] = [1, PORTFOLIO_SIZE + 1], being N batch size (1)
239
# and size the number of features (weights vector).
240
output = torch.squeeze(output, 3)
241
output = torch.squeeze(output, 1) # shape [N, PORTFOLIO_SIZE + 1]
242
243
output = self.softmax(output)
244
245
return output
246
247
def forward(self, observation, last_action):
248
"""Policy network's forward propagation.
249
250
Args:
251
observation: Environment observation (dictionary).
252
last_action: Last action performed by the agent.
253
254
Returns:
255
Action to be taken (numpy array).
256
"""
257
mu = self.mu(observation, last_action)
258
action = mu.cpu().detach().numpy().squeeze()
259
return action
260
261
def _process_last_action(self, last_action):
262
"""Process the last action to retrieve cash bias and last stocks.
263
264
Args:
265
last_action: Last performed action.
266
267
Returns:
268
Last stocks and cash bias.
269
"""
270
batch_size = last_action.shape[0]
271
stocks = last_action.shape[1] - 1
272
last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1))
273
cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1))
274
return last_stocks, cash_bias
275
276
277
class GPM(nn.Module):
278
def __init__(
279
self,
280
edge_index,
281
edge_type,
282
nodes_to_select,
283
initial_features=3,
284
k_short=3,
285
k_medium=21,
286
conv_mid_features=3,
287
conv_final_features=20,
288
graph_layers=1,
289
time_window=50,
290
softmax_temperature=1,
291
device="cpu",
292
):
293
"""GPM (Graph-based Portfolio Management) policy network initializer.
294
295
Args:
296
edge_index: Graph connectivity in COO format.
297
edge_type: Type of each edge in edge_index.
298
nodes_to_select: ID of nodes to be selected to the portfolio.
299
initial_features: Number of input features.
300
k_short: Size of short convolutional kernel.
301
k_medium: Size of medium convolutional kernel.
302
conv_mid_features: Size of intermediate convolutional channels.
303
conv_final_features: Size of final convolutional channels.
304
graph_layers: Number of graph neural network layers.
305
time_window: Size of time window used as agent's state.
306
softmax_temperature: Temperature parameter to softmax function.
307
device: Device in which the neural network will be run.
308
309
Note:
310
Reference article: https://doi.org/10.1016/j.neucom.2022.04.105.
311
"""
312
super().__init__()
313
self.device = device
314
self.softmax_temperature = softmax_temperature
315
316
num_relations = np.unique(edge_type).shape[0]
317
318
if isinstance(edge_index, np.ndarray):
319
edge_index = torch.from_numpy(edge_index)
320
self.edge_index = edge_index.to(self.device).long()
321
322
if isinstance(edge_type, np.ndarray):
323
edge_type = torch.from_numpy(edge_type)
324
self.edge_type = edge_type.to(self.device).long()
325
326
if isinstance(nodes_to_select, np.ndarray):
327
nodes_to_select = torch.from_numpy(nodes_to_select)
328
elif isinstance(nodes_to_select, list):
329
nodes_to_select = torch.tensor(nodes_to_select)
330
self.nodes_to_select = nodes_to_select.to(self.device)
331
332
n_short = time_window - k_short + 1
333
n_medium = time_window - k_medium + 1
334
n_long = time_window
335
336
self.short_term = nn.Sequential(
337
nn.Conv2d(
338
in_channels=initial_features,
339
out_channels=conv_mid_features,
340
kernel_size=(1, k_short),
341
),
342
nn.ReLU(),
343
nn.Conv2d(
344
in_channels=conv_mid_features,
345
out_channels=conv_final_features,
346
kernel_size=(1, n_short),
347
),
348
nn.ReLU(),
349
)
350
351
self.mid_term = nn.Sequential(
352
nn.Conv2d(
353
in_channels=initial_features,
354
out_channels=conv_mid_features,
355
kernel_size=(1, k_medium),
356
),
357
nn.ReLU(),
358
nn.Conv2d(
359
in_channels=conv_mid_features,
360
out_channels=conv_final_features,
361
kernel_size=(1, n_medium),
362
),
363
nn.ReLU(),
364
)
365
366
self.long_term = nn.Sequential(nn.MaxPool2d(kernel_size=(1, n_long)), nn.ReLU())
367
368
feature_size = 2 * conv_final_features + initial_features
369
370
graph_layers_list = []
371
for i in range(graph_layers):
372
graph_layers_list += [
373
(
374
RGCNConv(feature_size, feature_size, num_relations),
375
"x, edge_index, edge_type -> x",
376
),
377
nn.LeakyReLU(),
378
]
379
380
self.gcn = Sequential("x, edge_index, edge_type", graph_layers_list)
381
382
self.final_convolution = nn.Conv2d(
383
in_channels=2 * feature_size + 1,
384
out_channels=1,
385
kernel_size=(1, 1),
386
)
387
388
self.softmax = nn.Sequential(nn.Softmax(dim=-1))
389
390
def mu(self, observation, last_action):
391
"""Defines a most favorable action of this policy given input x.
392
393
Args:
394
observation: environment observation.
395
last_action: Last action performed by agent.
396
397
Returns:
398
Most favorable action.
399
"""
400
401
if isinstance(observation, np.ndarray):
402
observation = torch.from_numpy(observation)
403
observation = observation.to(self.device).float()
404
405
if isinstance(last_action, np.ndarray):
406
last_action = torch.from_numpy(last_action)
407
last_action = last_action.to(self.device).float()
408
409
last_stocks, cash_bias = self._process_last_action(last_action)
410
cash_bias = torch.zeros_like(cash_bias).to(self.device)
411
412
short_features = self.short_term(observation)
413
medium_features = self.mid_term(observation)
414
long_features = self.long_term(observation)
415
416
temporal_features = torch.cat(
417
[short_features, medium_features, long_features], dim=1
418
) # shape [N, feature_size, num_stocks, 1]
419
420
# add features to graph
421
graph_batch = self._create_graph_batch(temporal_features, self.edge_index)
422
423
# set edge index for the batch
424
edge_type = self._create_edge_type_for_batch(graph_batch, self.edge_type)
425
426
# perform graph convolution
427
graph_features = self.gcn(
428
graph_batch.x, graph_batch.edge_index, edge_type
429
) # shape [N * num_stocks, feature_size]
430
graph_features, _ = to_dense_batch(
431
graph_features, graph_batch.batch
432
) # shape [N, num_stocks, feature_size]
433
graph_features = torch.transpose(
434
graph_features, 1, 2
435
) # shape [N, feature_size, num_stocks]
436
graph_features = torch.unsqueeze(
437
graph_features, 3
438
) # shape [N, feature_size, num_stocks, 1]
439
graph_features = graph_features.to(self.device)
440
441
# concatenate graph features and temporal features
442
features = torch.cat(
443
[temporal_features, graph_features], dim=1
444
) # shape [N, 2 * feature_size, num_stocks, 1]
445
446
# perform selection and add last stocks
447
features = torch.index_select(
448
features, dim=2, index=self.nodes_to_select
449
) # shape [N, 2 * feature_size, portfolio_size, 1]
450
features = torch.cat([last_stocks, features], dim=1)
451
452
# final convolution
453
output = self.final_convolution(features) # shape [N, 1, portfolio_size, 1]
454
output = torch.cat(
455
[cash_bias, output], dim=2
456
) # shape [N, 1, portfolio_size + 1, 1]
457
458
# output shape must be [N, portfolio_size + 1] = [1, portfolio_size + 1], being N batch size
459
output = torch.squeeze(output, 3)
460
output = torch.squeeze(output, 1) # shape [N, portfolio_size + 1]
461
462
output = self.softmax(output / self.softmax_temperature)
463
464
return output
465
466
def forward(self, observation, last_action):
467
"""Policy network's forward propagation.
468
469
Args:
470
observation: Environment observation (dictionary).
471
last_action: Last action performed by the agent.
472
473
Returns:
474
Action to be taken (numpy array).
475
"""
476
mu = self.mu(observation, last_action)
477
action = mu.cpu().detach().numpy().squeeze()
478
return action
479
480
def _process_last_action(self, last_action):
481
"""Process the last action to retrieve cash bias and last stocks.
482
483
Args:
484
last_action: Last performed action.
485
486
Returns:
487
Last stocks and cash bias.
488
"""
489
batch_size = last_action.shape[0]
490
stocks = last_action.shape[1] - 1
491
last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1))
492
cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1))
493
return last_stocks, cash_bias
494
495
def _create_graph_batch(self, features, edge_index):
496
"""Create a batch of graphs with the features.
497
498
Args:
499
features: Tensor of shape [batch_size, feature_size, num_stocks, 1].
500
edge_index: Graph connectivity in COO format.
501
502
Returns:
503
A batch of graphs with temporal features associated with each node.
504
"""
505
batch_size = features.shape[0]
506
graphs = []
507
for i in range(batch_size):
508
x = features[i, :, :, 0] # shape [feature_size, num_stocks]
509
x = torch.transpose(x, 0, 1) # shape [num_stocks, feature_size]
510
new_graph = Data(x=x, edge_index=edge_index).to(self.device)
511
graphs.append(new_graph)
512
return Batch.from_data_list(graphs)
513
514
def _create_edge_type_for_batch(self, batch, edge_type):
515
"""Create the edge type tensor for a batch of graphs.
516
517
Args:
518
batch: Batch of graph data.
519
edge_type: Original edge type tensor.
520
521
Returns:
522
Edge type tensor adapted for the batch.
523
"""
524
batch_edge_type = torch.clone(edge_type).detach()
525
for i in range(1, batch.batch_size):
526
batch_edge_type = torch.cat(
527
[batch_edge_type, torch.clone(edge_type).detach()]
528
)
529
return batch_edge_type
530
531