CoCalc -- architectures.py

GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/agents/portfolio_optimization/architectures.py
⁷³² views
1
from __future__ import annotations
2

3
import numpy as np
4
import torch
5
from torch import nn
6
from torch_geometric.data import Batch
7
from torch_geometric.data import Data
8
from torch_geometric.nn import RGCNConv
9
from torch_geometric.nn import Sequential
10
from torch_geometric.utils import to_dense_batch
11

12

13
class EIIE(nn.Module):
14
    def __init__(
15
        self,
16
        initial_features=3,
17
        k_size=3,
18
        conv_mid_features=2,
19
        conv_final_features=20,
20
        time_window=50,
21
        device="cpu",
22
    ):
23
        """EIIE (ensemble of identical independent evaluators) policy network
24
        initializer.
25

26
        Args:
27
            initial_features: Number of input features.
28
            k_size: Size of first convolutional kernel.
29
            conv_mid_features: Size of intermediate convolutional channels.
30
            conv_final_features: Size of final convolutional channels.
31
            time_window: Size of time window used as agent's state.
32
            device: Device in which the neural network will be run.
33

34
        Note:
35
            Reference article: https://doi.org/10.48550/arXiv.1706.10059.
36
        """
37
        super().__init__()
38
        self.device = device
39

40
        n_size = time_window - k_size + 1
41

42
        self.sequential = nn.Sequential(
43
            nn.Conv2d(
44
                in_channels=initial_features,
45
                out_channels=conv_mid_features,
46
                kernel_size=(1, k_size),
47
            ),
48
            nn.ReLU(),
49
            nn.Conv2d(
50
                in_channels=conv_mid_features,
51
                out_channels=conv_final_features,
52
                kernel_size=(1, n_size),
53
            ),
54
            nn.ReLU(),
55
        )
56

57
        self.final_convolution = nn.Conv2d(
58
            in_channels=conv_final_features + 1, out_channels=1, kernel_size=(1, 1)
59
        )
60

61
        self.softmax = nn.Sequential(nn.Softmax(dim=-1))
62

63
    def mu(self, observation, last_action):
64
        """Defines a most favorable action of this policy given input x.
65

66
        Args:
67
          observation: environment observation.
68
          last_action: Last action performed by agent.
69

70
        Returns:
71
          Most favorable action.
72
        """
73

74
        if isinstance(observation, np.ndarray):
75
            observation = torch.from_numpy(observation)
76
        observation = observation.to(self.device).float()
77

78
        if isinstance(last_action, np.ndarray):
79
            last_action = torch.from_numpy(last_action)
80
        last_action = last_action.to(self.device).float()
81

82
        last_stocks, cash_bias = self._process_last_action(last_action)
83
        cash_bias = torch.zeros_like(cash_bias).to(self.device)
84

85
        output = self.sequential(observation)  # shape [N, 20, PORTFOLIO_SIZE, 1]
86
        output = torch.cat(
87
            [last_stocks, output], dim=1
88
        )  # shape [N, 21, PORTFOLIO_SIZE, 1]
89
        output = self.final_convolution(output)  # shape [N, 1, PORTFOLIO_SIZE, 1]
90
        output = torch.cat(
91
            [cash_bias, output], dim=2
92
        )  # shape [N, 1, PORTFOLIO_SIZE + 1, 1]
93

94
        # output shape must be [N, features] = [1, PORTFOLIO_SIZE + 1], being N batch size (1)
95
        # and size the number of features (weights vector).
96
        output = torch.squeeze(output, 3)
97
        output = torch.squeeze(output, 1)  # shape [N, PORTFOLIO_SIZE + 1]
98

99
        output = self.softmax(output)
100

101
        return output
102

103
    def forward(self, observation, last_action):
104
        """Policy network's forward propagation.
105

106
        Args:
107
          observation: Environment observation (dictionary).
108
          last_action: Last action performed by the agent.
109

110
        Returns:
111
          Action to be taken (numpy array).
112
        """
113
        mu = self.mu(observation, last_action)
114
        action = mu.cpu().detach().numpy().squeeze()
115
        return action
116

117
    def _process_last_action(self, last_action):
118
        """Process the last action to retrieve cash bias and last stocks.
119

120
        Args:
121
          last_action: Last performed action.
122

123
        Returns:
124
            Last stocks and cash bias.
125
        """
126
        batch_size = last_action.shape[0]
127
        stocks = last_action.shape[1] - 1
128
        last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1))
129
        cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1))
130
        return last_stocks, cash_bias
131

132

133
class EI3(nn.Module):
134
    def __init__(
135
        self,
136
        initial_features=3,
137
        k_short=3,
138
        k_medium=21,
139
        conv_mid_features=3,
140
        conv_final_features=20,
141
        time_window=50,
142
        device="cpu",
143
    ):
144
        """EI3 (ensemble of identical independent inception) policy network
145
        initializer.
146

147
        Args:
148
            initial_features: Number of input features.
149
            k_short: Size of short convolutional kernel.
150
            k_medium: Size of medium convolutional kernel.
151
            conv_mid_features: Size of intermediate convolutional channels.
152
            conv_final_features: Size of final convolutional channels.
153
            time_window: Size of time window used as agent's state.
154
            device: Device in which the neural network will be run.
155

156
        Note:
157
            Reference article: https://doi.org/10.1145/3357384.3357961.
158
        """
159
        super().__init__()
160
        self.device = device
161

162
        n_short = time_window - k_short + 1
163
        n_medium = time_window - k_medium + 1
164
        n_long = time_window
165

166
        self.short_term = nn.Sequential(
167
            nn.Conv2d(
168
                in_channels=initial_features,
169
                out_channels=conv_mid_features,
170
                kernel_size=(1, k_short),
171
            ),
172
            nn.ReLU(),
173
            nn.Conv2d(
174
                in_channels=conv_mid_features,
175
                out_channels=conv_final_features,
176
                kernel_size=(1, n_short),
177
            ),
178
            nn.ReLU(),
179
        )
180

181
        self.mid_term = nn.Sequential(
182
            nn.Conv2d(
183
                in_channels=initial_features,
184
                out_channels=conv_mid_features,
185
                kernel_size=(1, k_medium),
186
            ),
187
            nn.ReLU(),
188
            nn.Conv2d(
189
                in_channels=conv_mid_features,
190
                out_channels=conv_final_features,
191
                kernel_size=(1, n_medium),
192
            ),
193
            nn.ReLU(),
194
        )
195

196
        self.long_term = nn.Sequential(nn.MaxPool2d(kernel_size=(1, n_long)), nn.ReLU())
197

198
        self.final_convolution = nn.Conv2d(
199
            in_channels=2 * conv_final_features + initial_features + 1,
200
            out_channels=1,
201
            kernel_size=(1, 1),
202
        )
203

204
        self.softmax = nn.Sequential(nn.Softmax(dim=-1))
205

206
    def mu(self, observation, last_action):
207
        """Defines a most favorable action of this policy given input x.
208

209
        Args:
210
          observation: environment observation.
211
          last_action: Last action performed by agent.
212

213
        Returns:
214
          Most favorable action.
215
        """
216

217
        if isinstance(observation, np.ndarray):
218
            observation = torch.from_numpy(observation)
219
        observation = observation.to(self.device).float()
220

221
        if isinstance(last_action, np.ndarray):
222
            last_action = torch.from_numpy(last_action)
223
        last_action = last_action.to(self.device).float()
224

225
        last_stocks, cash_bias = self._process_last_action(last_action)
226
        cash_bias = torch.zeros_like(cash_bias).to(self.device)
227

228
        short_features = self.short_term(observation)
229
        medium_features = self.mid_term(observation)
230
        long_features = self.long_term(observation)
231

232
        features = torch.cat(
233
            [last_stocks, short_features, medium_features, long_features], dim=1
234
        )
235
        output = self.final_convolution(features)
236
        output = torch.cat([cash_bias, output], dim=2)
237

238
        # output shape must be [N, features] = [1, PORTFOLIO_SIZE + 1], being N batch size (1)
239
        # and size the number of features (weights vector).
240
        output = torch.squeeze(output, 3)
241
        output = torch.squeeze(output, 1)  # shape [N, PORTFOLIO_SIZE + 1]
242

243
        output = self.softmax(output)
244

245
        return output
246

247
    def forward(self, observation, last_action):
248
        """Policy network's forward propagation.
249

250
        Args:
251
          observation: Environment observation (dictionary).
252
          last_action: Last action performed by the agent.
253

254
        Returns:
255
          Action to be taken (numpy array).
256
        """
257
        mu = self.mu(observation, last_action)
258
        action = mu.cpu().detach().numpy().squeeze()
259
        return action
260

261
    def _process_last_action(self, last_action):
262
        """Process the last action to retrieve cash bias and last stocks.
263

264
        Args:
265
          last_action: Last performed action.
266

267
        Returns:
268
            Last stocks and cash bias.
269
        """
270
        batch_size = last_action.shape[0]
271
        stocks = last_action.shape[1] - 1
272
        last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1))
273
        cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1))
274
        return last_stocks, cash_bias
275

276

277
class GPM(nn.Module):
278
    def __init__(
279
        self,
280
        edge_index,
281
        edge_type,
282
        nodes_to_select,
283
        initial_features=3,
284
        k_short=3,
285
        k_medium=21,
286
        conv_mid_features=3,
287
        conv_final_features=20,
288
        graph_layers=1,
289
        time_window=50,
290
        softmax_temperature=1,
291
        device="cpu",
292
    ):
293
        """GPM (Graph-based Portfolio Management) policy network initializer.
294

295
        Args:
296
            edge_index: Graph connectivity in COO format.
297
            edge_type: Type of each edge in edge_index.
298
            nodes_to_select: ID of nodes to be selected to the portfolio.
299
            initial_features: Number of input features.
300
            k_short: Size of short convolutional kernel.
301
            k_medium: Size of medium convolutional kernel.
302
            conv_mid_features: Size of intermediate convolutional channels.
303
            conv_final_features: Size of final convolutional channels.
304
            graph_layers: Number of graph neural network layers.
305
            time_window: Size of time window used as agent's state.
306
            softmax_temperature: Temperature parameter to softmax function.
307
            device: Device in which the neural network will be run.
308

309
        Note:
310
            Reference article: https://doi.org/10.1016/j.neucom.2022.04.105.
311
        """
312
        super().__init__()
313
        self.device = device
314
        self.softmax_temperature = softmax_temperature
315

316
        num_relations = np.unique(edge_type).shape[0]
317

318
        if isinstance(edge_index, np.ndarray):
319
            edge_index = torch.from_numpy(edge_index)
320
        self.edge_index = edge_index.to(self.device).long()
321

322
        if isinstance(edge_type, np.ndarray):
323
            edge_type = torch.from_numpy(edge_type)
324
        self.edge_type = edge_type.to(self.device).long()
325

326
        if isinstance(nodes_to_select, np.ndarray):
327
            nodes_to_select = torch.from_numpy(nodes_to_select)
328
        elif isinstance(nodes_to_select, list):
329
            nodes_to_select = torch.tensor(nodes_to_select)
330
        self.nodes_to_select = nodes_to_select.to(self.device)
331

332
        n_short = time_window - k_short + 1
333
        n_medium = time_window - k_medium + 1
334
        n_long = time_window
335

336
        self.short_term = nn.Sequential(
337
            nn.Conv2d(
338
                in_channels=initial_features,
339
                out_channels=conv_mid_features,
340
                kernel_size=(1, k_short),
341
            ),
342
            nn.ReLU(),
343
            nn.Conv2d(
344
                in_channels=conv_mid_features,
345
                out_channels=conv_final_features,
346
                kernel_size=(1, n_short),
347
            ),
348
            nn.ReLU(),
349
        )
350

351
        self.mid_term = nn.Sequential(
352
            nn.Conv2d(
353
                in_channels=initial_features,
354
                out_channels=conv_mid_features,
355
                kernel_size=(1, k_medium),
356
            ),
357
            nn.ReLU(),
358
            nn.Conv2d(
359
                in_channels=conv_mid_features,
360
                out_channels=conv_final_features,
361
                kernel_size=(1, n_medium),
362
            ),
363
            nn.ReLU(),
364
        )
365

366
        self.long_term = nn.Sequential(nn.MaxPool2d(kernel_size=(1, n_long)), nn.ReLU())
367

368
        feature_size = 2 * conv_final_features + initial_features
369

370
        graph_layers_list = []
371
        for i in range(graph_layers):
372
            graph_layers_list += [
373
                (
374
                    RGCNConv(feature_size, feature_size, num_relations),
375
                    "x, edge_index, edge_type -> x",
376
                ),
377
                nn.LeakyReLU(),
378
            ]
379

380
        self.gcn = Sequential("x, edge_index, edge_type", graph_layers_list)
381

382
        self.final_convolution = nn.Conv2d(
383
            in_channels=2 * feature_size + 1,
384
            out_channels=1,
385
            kernel_size=(1, 1),
386
        )
387

388
        self.softmax = nn.Sequential(nn.Softmax(dim=-1))
389

390
    def mu(self, observation, last_action):
391
        """Defines a most favorable action of this policy given input x.
392

393
        Args:
394
          observation: environment observation.
395
          last_action: Last action performed by agent.
396

397
        Returns:
398
          Most favorable action.
399
        """
400

401
        if isinstance(observation, np.ndarray):
402
            observation = torch.from_numpy(observation)
403
        observation = observation.to(self.device).float()
404

405
        if isinstance(last_action, np.ndarray):
406
            last_action = torch.from_numpy(last_action)
407
        last_action = last_action.to(self.device).float()
408

409
        last_stocks, cash_bias = self._process_last_action(last_action)
410
        cash_bias = torch.zeros_like(cash_bias).to(self.device)
411

412
        short_features = self.short_term(observation)
413
        medium_features = self.mid_term(observation)
414
        long_features = self.long_term(observation)
415

416
        temporal_features = torch.cat(
417
            [short_features, medium_features, long_features], dim=1
418
        )  # shape [N, feature_size, num_stocks, 1]
419

420
        # add features to graph
421
        graph_batch = self._create_graph_batch(temporal_features, self.edge_index)
422

423
        # set edge index for the batch
424
        edge_type = self._create_edge_type_for_batch(graph_batch, self.edge_type)
425

426
        # perform graph convolution
427
        graph_features = self.gcn(
428
            graph_batch.x, graph_batch.edge_index, edge_type
429
        )  # shape [N * num_stocks, feature_size]
430
        graph_features, _ = to_dense_batch(
431
            graph_features, graph_batch.batch
432
        )  # shape [N, num_stocks, feature_size]
433
        graph_features = torch.transpose(
434
            graph_features, 1, 2
435
        )  # shape [N, feature_size, num_stocks]
436
        graph_features = torch.unsqueeze(
437
            graph_features, 3
438
        )  # shape [N, feature_size, num_stocks, 1]
439
        graph_features = graph_features.to(self.device)
440

441
        # concatenate graph features and temporal features
442
        features = torch.cat(
443
            [temporal_features, graph_features], dim=1
444
        )  # shape [N, 2 * feature_size, num_stocks, 1]
445

446
        # perform selection and add last stocks
447
        features = torch.index_select(
448
            features, dim=2, index=self.nodes_to_select
449
        )  # shape [N, 2 * feature_size, portfolio_size, 1]
450
        features = torch.cat([last_stocks, features], dim=1)
451

452
        # final convolution
453
        output = self.final_convolution(features)  # shape [N, 1, portfolio_size, 1]
454
        output = torch.cat(
455
            [cash_bias, output], dim=2
456
        )  # shape [N, 1, portfolio_size + 1, 1]
457

458
        # output shape must be [N, portfolio_size + 1] = [1, portfolio_size + 1], being N batch size
459
        output = torch.squeeze(output, 3)
460
        output = torch.squeeze(output, 1)  # shape [N, portfolio_size + 1]
461

462
        output = self.softmax(output / self.softmax_temperature)
463

464
        return output
465

466
    def forward(self, observation, last_action):
467
        """Policy network's forward propagation.
468

469
        Args:
470
          observation: Environment observation (dictionary).
471
          last_action: Last action performed by the agent.
472

473
        Returns:
474
          Action to be taken (numpy array).
475
        """
476
        mu = self.mu(observation, last_action)
477
        action = mu.cpu().detach().numpy().squeeze()
478
        return action
479

480
    def _process_last_action(self, last_action):
481
        """Process the last action to retrieve cash bias and last stocks.
482

483
        Args:
484
          last_action: Last performed action.
485

486
        Returns:
487
          Last stocks and cash bias.
488
        """
489
        batch_size = last_action.shape[0]
490
        stocks = last_action.shape[1] - 1
491
        last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1))
492
        cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1))
493
        return last_stocks, cash_bias
494

495
    def _create_graph_batch(self, features, edge_index):
496
        """Create a batch of graphs with the features.
497

498
        Args:
499
          features: Tensor of shape [batch_size, feature_size, num_stocks, 1].
500
          edge_index: Graph connectivity in COO format.
501

502
        Returns:
503
          A batch of graphs with temporal features associated with each node.
504
        """
505
        batch_size = features.shape[0]
506
        graphs = []
507
        for i in range(batch_size):
508
            x = features[i, :, :, 0]  # shape [feature_size, num_stocks]
509
            x = torch.transpose(x, 0, 1)  # shape [num_stocks, feature_size]
510
            new_graph = Data(x=x, edge_index=edge_index).to(self.device)
511
            graphs.append(new_graph)
512
        return Batch.from_data_list(graphs)
513

514
    def _create_edge_type_for_batch(self, batch, edge_type):
515
        """Create the edge type tensor for a batch of graphs.
516

517
        Args:
518
          batch: Batch of graph data.
519
          edge_type: Original edge type tensor.
520

521
        Returns:
522
          Edge type tensor adapted for the batch.
523
        """
524
        batch_edge_type = torch.clone(edge_type).detach()
525
        for i in range(1, batch.batch_size):
526
            batch_edge_type = torch.cat(
527
                [batch_edge_type, torch.clone(edge_type).detach()]
528
            )
529
        return batch_edge_type
530

531
Product

Resources

Company