Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
AI4Finance-Foundation
GitHub Repository: AI4Finance-Foundation/FinRL
Path: blob/master/finrl/agents/stablebaselines3/hyperparams_opt.py
732 views
1
from __future__ import annotations
2
3
from typing import Any
4
from typing import Dict
5
6
import numpy as np
7
import optuna
8
from stable_baselines3.common.noise import NormalActionNoise
9
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
10
from torch import nn as nn
11
from utils import linear_schedule
12
13
14
def sample_ppo_params(trial: optuna.Trial) -> dict[str, Any]:
15
"""
16
Sampler for PPO hyperparams.
17
18
:param trial:
19
:return:
20
"""
21
batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64, 128, 256, 512])
22
n_steps = trial.suggest_categorical(
23
"n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
24
)
25
gamma = trial.suggest_categorical(
26
"gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]
27
)
28
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
29
lr_schedule = "constant"
30
# Uncomment to enable learning rate schedule
31
# lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant'])
32
ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
33
clip_range = trial.suggest_categorical("clip_range", [0.1, 0.2, 0.3, 0.4])
34
n_epochs = trial.suggest_categorical("n_epochs", [1, 5, 10, 20])
35
gae_lambda = trial.suggest_categorical(
36
"gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]
37
)
38
max_grad_norm = trial.suggest_categorical(
39
"max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]
40
)
41
vf_coef = trial.suggest_uniform("vf_coef", 0, 1)
42
net_arch = trial.suggest_categorical("net_arch", ["small", "medium"])
43
# Uncomment for gSDE (continuous actions)
44
# log_std_init = trial.suggest_uniform("log_std_init", -4, 1)
45
# Uncomment for gSDE (continuous action)
46
# sde_sample_freq = trial.suggest_categorical("sde_sample_freq", [-1, 8, 16, 32, 64, 128, 256])
47
# Orthogonal initialization
48
ortho_init = False
49
# ortho_init = trial.suggest_categorical('ortho_init', [False, True])
50
# activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu'])
51
activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"])
52
53
# TODO: account when using multiple envs
54
if batch_size > n_steps:
55
batch_size = n_steps
56
57
if lr_schedule == "linear":
58
learning_rate = linear_schedule(learning_rate)
59
60
# Independent networks usually work best
61
# when not working with images
62
net_arch = {
63
"small": [dict(pi=[64, 64], vf=[64, 64])],
64
"medium": [dict(pi=[256, 256], vf=[256, 256])],
65
}[net_arch]
66
67
activation_fn = {
68
"tanh": nn.Tanh,
69
"relu": nn.ReLU,
70
"elu": nn.ELU,
71
"leaky_relu": nn.LeakyReLU,
72
}[activation_fn]
73
74
return {
75
"n_steps": n_steps,
76
"batch_size": batch_size,
77
"gamma": gamma,
78
"learning_rate": learning_rate,
79
"ent_coef": ent_coef,
80
"clip_range": clip_range,
81
"n_epochs": n_epochs,
82
"gae_lambda": gae_lambda,
83
"max_grad_norm": max_grad_norm,
84
"vf_coef": vf_coef,
85
# "sde_sample_freq": sde_sample_freq,
86
"policy_kwargs": dict(
87
# log_std_init=log_std_init,
88
net_arch=net_arch,
89
activation_fn=activation_fn,
90
ortho_init=ortho_init,
91
),
92
}
93
94
95
def sample_trpo_params(trial: optuna.Trial) -> dict[str, Any]:
96
"""
97
Sampler for TRPO hyperparams.
98
99
:param trial:
100
:return:
101
"""
102
batch_size = trial.suggest_categorical("batch_size", [8, 16, 32, 64, 128, 256, 512])
103
n_steps = trial.suggest_categorical(
104
"n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
105
)
106
gamma = trial.suggest_categorical(
107
"gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]
108
)
109
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
110
lr_schedule = "constant"
111
# Uncomment to enable learning rate schedule
112
# lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant'])
113
# line_search_shrinking_factor = trial.suggest_categorical("line_search_shrinking_factor", [0.6, 0.7, 0.8, 0.9])
114
n_critic_updates = trial.suggest_categorical(
115
"n_critic_updates", [5, 10, 20, 25, 30]
116
)
117
cg_max_steps = trial.suggest_categorical("cg_max_steps", [5, 10, 20, 25, 30])
118
# cg_damping = trial.suggest_categorical("cg_damping", [0.5, 0.2, 0.1, 0.05, 0.01])
119
target_kl = trial.suggest_categorical(
120
"target_kl", [0.1, 0.05, 0.03, 0.02, 0.01, 0.005, 0.001]
121
)
122
gae_lambda = trial.suggest_categorical(
123
"gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]
124
)
125
net_arch = trial.suggest_categorical("net_arch", ["small", "medium"])
126
# Uncomment for gSDE (continuous actions)
127
# log_std_init = trial.suggest_uniform("log_std_init", -4, 1)
128
# Uncomment for gSDE (continuous action)
129
# sde_sample_freq = trial.suggest_categorical("sde_sample_freq", [-1, 8, 16, 32, 64, 128, 256])
130
# Orthogonal initialization
131
ortho_init = False
132
# ortho_init = trial.suggest_categorical('ortho_init', [False, True])
133
# activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu'])
134
activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"])
135
136
# TODO: account when using multiple envs
137
if batch_size > n_steps:
138
batch_size = n_steps
139
140
if lr_schedule == "linear":
141
learning_rate = linear_schedule(learning_rate)
142
143
# Independent networks usually work best
144
# when not working with images
145
net_arch = {
146
"small": [dict(pi=[64, 64], vf=[64, 64])],
147
"medium": [dict(pi=[256, 256], vf=[256, 256])],
148
}[net_arch]
149
150
activation_fn = {
151
"tanh": nn.Tanh,
152
"relu": nn.ReLU,
153
"elu": nn.ELU,
154
"leaky_relu": nn.LeakyReLU,
155
}[activation_fn]
156
157
return {
158
"n_steps": n_steps,
159
"batch_size": batch_size,
160
"gamma": gamma,
161
# "cg_damping": cg_damping,
162
"cg_max_steps": cg_max_steps,
163
# "line_search_shrinking_factor": line_search_shrinking_factor,
164
"n_critic_updates": n_critic_updates,
165
"target_kl": target_kl,
166
"learning_rate": learning_rate,
167
"gae_lambda": gae_lambda,
168
# "sde_sample_freq": sde_sample_freq,
169
"policy_kwargs": dict(
170
# log_std_init=log_std_init,
171
net_arch=net_arch,
172
activation_fn=activation_fn,
173
ortho_init=ortho_init,
174
),
175
}
176
177
178
def sample_a2c_params(trial: optuna.Trial) -> dict[str, Any]:
179
"""
180
Sampler for A2C hyperparams.
181
182
:param trial:
183
:return:
184
"""
185
gamma = trial.suggest_categorical(
186
"gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]
187
)
188
normalize_advantage = trial.suggest_categorical(
189
"normalize_advantage", [False, True]
190
)
191
max_grad_norm = trial.suggest_categorical(
192
"max_grad_norm", [0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]
193
)
194
# Toggle PyTorch RMS Prop (different from TF one, cf doc)
195
use_rms_prop = trial.suggest_categorical("use_rms_prop", [False, True])
196
gae_lambda = trial.suggest_categorical(
197
"gae_lambda", [0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]
198
)
199
n_steps = trial.suggest_categorical(
200
"n_steps", [8, 16, 32, 64, 128, 256, 512, 1024, 2048]
201
)
202
lr_schedule = trial.suggest_categorical("lr_schedule", ["linear", "constant"])
203
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
204
ent_coef = trial.suggest_loguniform("ent_coef", 0.00000001, 0.1)
205
vf_coef = trial.suggest_uniform("vf_coef", 0, 1)
206
# Uncomment for gSDE (continuous actions)
207
# log_std_init = trial.suggest_uniform("log_std_init", -4, 1)
208
ortho_init = trial.suggest_categorical("ortho_init", [False, True])
209
net_arch = trial.suggest_categorical("net_arch", ["small", "medium"])
210
# sde_net_arch = trial.suggest_categorical("sde_net_arch", [None, "tiny", "small"])
211
# full_std = trial.suggest_categorical("full_std", [False, True])
212
# activation_fn = trial.suggest_categorical('activation_fn', ['tanh', 'relu', 'elu', 'leaky_relu'])
213
activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"])
214
215
if lr_schedule == "linear":
216
learning_rate = linear_schedule(learning_rate)
217
218
net_arch = {
219
"small": [dict(pi=[64, 64], vf=[64, 64])],
220
"medium": [dict(pi=[256, 256], vf=[256, 256])],
221
}[net_arch]
222
223
# sde_net_arch = {
224
# None: None,
225
# "tiny": [64],
226
# "small": [64, 64],
227
# }[sde_net_arch]
228
229
activation_fn = {
230
"tanh": nn.Tanh,
231
"relu": nn.ReLU,
232
"elu": nn.ELU,
233
"leaky_relu": nn.LeakyReLU,
234
}[activation_fn]
235
236
return {
237
"n_steps": n_steps,
238
"gamma": gamma,
239
"gae_lambda": gae_lambda,
240
"learning_rate": learning_rate,
241
"ent_coef": ent_coef,
242
"normalize_advantage": normalize_advantage,
243
"max_grad_norm": max_grad_norm,
244
"use_rms_prop": use_rms_prop,
245
"vf_coef": vf_coef,
246
"policy_kwargs": dict(
247
# log_std_init=log_std_init,
248
net_arch=net_arch,
249
# full_std=full_std,
250
activation_fn=activation_fn,
251
# sde_net_arch=sde_net_arch,
252
ortho_init=ortho_init,
253
),
254
}
255
256
257
def sample_sac_params(trial: optuna.Trial) -> dict[str, Any]:
258
"""
259
Sampler for SAC hyperparams.
260
261
:param trial:
262
:return:
263
"""
264
gamma = trial.suggest_categorical(
265
"gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]
266
)
267
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
268
batch_size = trial.suggest_categorical(
269
"batch_size", [16, 32, 64, 128, 256, 512, 1024, 2048]
270
)
271
buffer_size = trial.suggest_categorical(
272
"buffer_size", [int(1e4), int(1e5), int(1e6)]
273
)
274
learning_starts = trial.suggest_categorical(
275
"learning_starts", [0, 1000, 10000, 20000]
276
)
277
# train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 300])
278
train_freq = trial.suggest_categorical(
279
"train_freq", [1, 4, 8, 16, 32, 64, 128, 256, 512]
280
)
281
# Polyak coeff
282
tau = trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02, 0.05, 0.08])
283
# gradient_steps takes too much time
284
# gradient_steps = trial.suggest_categorical('gradient_steps', [1, 100, 300])
285
gradient_steps = train_freq
286
# ent_coef = trial.suggest_categorical('ent_coef', ['auto', 0.5, 0.1, 0.05, 0.01, 0.0001])
287
ent_coef = "auto"
288
# You can comment that out when not using gSDE
289
log_std_init = trial.suggest_uniform("log_std_init", -4, 1)
290
# NOTE: Add "verybig" to net_arch when tuning HER
291
net_arch = trial.suggest_categorical("net_arch", ["small", "medium", "big"])
292
# activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU])
293
294
net_arch = {
295
"small": [64, 64],
296
"medium": [256, 256],
297
"big": [400, 300],
298
# Uncomment for tuning HER
299
# "large": [256, 256, 256],
300
# "verybig": [512, 512, 512],
301
}[net_arch]
302
303
target_entropy = "auto"
304
# if ent_coef == 'auto':
305
# # target_entropy = trial.suggest_categorical('target_entropy', ['auto', 5, 1, 0, -1, -5, -10, -20, -50])
306
# target_entropy = trial.suggest_uniform('target_entropy', -10, 10)
307
308
hyperparams = {
309
"gamma": gamma,
310
"learning_rate": learning_rate,
311
"batch_size": batch_size,
312
"buffer_size": buffer_size,
313
"learning_starts": learning_starts,
314
"train_freq": train_freq,
315
"gradient_steps": gradient_steps,
316
"ent_coef": ent_coef,
317
"tau": tau,
318
"target_entropy": target_entropy,
319
"policy_kwargs": dict(log_std_init=log_std_init, net_arch=net_arch),
320
}
321
322
if trial.using_her_replay_buffer:
323
hyperparams = sample_her_params(trial, hyperparams)
324
325
return hyperparams
326
327
328
def sample_td3_params(trial: optuna.Trial) -> dict[str, Any]:
329
"""
330
Sampler for TD3 hyperparams.
331
332
:param trial:
333
:return:
334
"""
335
gamma = trial.suggest_categorical(
336
"gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]
337
)
338
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
339
batch_size = trial.suggest_categorical(
340
"batch_size", [16, 32, 64, 100, 128, 256, 512, 1024, 2048]
341
)
342
buffer_size = trial.suggest_categorical(
343
"buffer_size", [int(1e4), int(1e5), int(1e6)]
344
)
345
# Polyak coeff
346
tau = trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02, 0.05, 0.08])
347
348
train_freq = trial.suggest_categorical(
349
"train_freq", [1, 4, 8, 16, 32, 64, 128, 256, 512]
350
)
351
gradient_steps = train_freq
352
353
noise_type = trial.suggest_categorical(
354
"noise_type", ["ornstein-uhlenbeck", "normal", None]
355
)
356
noise_std = trial.suggest_uniform("noise_std", 0, 1)
357
358
# NOTE: Add "verybig" to net_arch when tuning HER
359
net_arch = trial.suggest_categorical("net_arch", ["small", "medium", "big"])
360
# activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU])
361
362
net_arch = {
363
"small": [64, 64],
364
"medium": [256, 256],
365
"big": [400, 300],
366
# Uncomment for tuning HER
367
# "verybig": [256, 256, 256],
368
}[net_arch]
369
370
hyperparams = {
371
"gamma": gamma,
372
"learning_rate": learning_rate,
373
"batch_size": batch_size,
374
"buffer_size": buffer_size,
375
"train_freq": train_freq,
376
"gradient_steps": gradient_steps,
377
"policy_kwargs": dict(net_arch=net_arch),
378
"tau": tau,
379
}
380
381
if noise_type == "normal":
382
hyperparams["action_noise"] = NormalActionNoise(
383
mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)
384
)
385
elif noise_type == "ornstein-uhlenbeck":
386
hyperparams["action_noise"] = OrnsteinUhlenbeckActionNoise(
387
mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)
388
)
389
390
if trial.using_her_replay_buffer:
391
hyperparams = sample_her_params(trial, hyperparams)
392
393
return hyperparams
394
395
396
def sample_ddpg_params(trial: optuna.Trial) -> dict[str, Any]:
397
"""
398
Sampler for DDPG hyperparams.
399
400
:param trial:
401
:return:
402
"""
403
gamma = trial.suggest_categorical(
404
"gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]
405
)
406
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
407
batch_size = trial.suggest_categorical(
408
"batch_size", [16, 32, 64, 100, 128, 256, 512, 1024, 2048]
409
)
410
buffer_size = trial.suggest_categorical(
411
"buffer_size", [int(1e4), int(1e5), int(1e6)]
412
)
413
# Polyak coeff
414
tau = trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02, 0.05, 0.08])
415
416
train_freq = trial.suggest_categorical(
417
"train_freq", [1, 4, 8, 16, 32, 64, 128, 256, 512]
418
)
419
gradient_steps = train_freq
420
421
noise_type = trial.suggest_categorical(
422
"noise_type", ["ornstein-uhlenbeck", "normal", None]
423
)
424
noise_std = trial.suggest_uniform("noise_std", 0, 1)
425
426
# NOTE: Add "verybig" to net_arch when tuning HER (see TD3)
427
net_arch = trial.suggest_categorical("net_arch", ["small", "medium", "big"])
428
# activation_fn = trial.suggest_categorical('activation_fn', [nn.Tanh, nn.ReLU, nn.ELU, nn.LeakyReLU])
429
430
net_arch = {"small": [64, 64], "medium": [256, 256], "big": [400, 300]}[net_arch]
431
432
hyperparams = {
433
"gamma": gamma,
434
"tau": tau,
435
"learning_rate": learning_rate,
436
"batch_size": batch_size,
437
"buffer_size": buffer_size,
438
"train_freq": train_freq,
439
"gradient_steps": gradient_steps,
440
"policy_kwargs": dict(net_arch=net_arch),
441
}
442
443
if noise_type == "normal":
444
hyperparams["action_noise"] = NormalActionNoise(
445
mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)
446
)
447
elif noise_type == "ornstein-uhlenbeck":
448
hyperparams["action_noise"] = OrnsteinUhlenbeckActionNoise(
449
mean=np.zeros(trial.n_actions), sigma=noise_std * np.ones(trial.n_actions)
450
)
451
452
if trial.using_her_replay_buffer:
453
hyperparams = sample_her_params(trial, hyperparams)
454
455
return hyperparams
456
457
458
def sample_dqn_params(trial: optuna.Trial) -> dict[str, Any]:
459
"""
460
Sampler for DQN hyperparams.
461
462
:param trial:
463
:return:
464
"""
465
gamma = trial.suggest_categorical(
466
"gamma", [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]
467
)
468
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
469
batch_size = trial.suggest_categorical(
470
"batch_size", [16, 32, 64, 100, 128, 256, 512]
471
)
472
buffer_size = trial.suggest_categorical(
473
"buffer_size", [int(1e4), int(5e4), int(1e5), int(1e6)]
474
)
475
exploration_final_eps = trial.suggest_uniform("exploration_final_eps", 0, 0.2)
476
exploration_fraction = trial.suggest_uniform("exploration_fraction", 0, 0.5)
477
target_update_interval = trial.suggest_categorical(
478
"target_update_interval", [1, 1000, 5000, 10000, 15000, 20000]
479
)
480
learning_starts = trial.suggest_categorical(
481
"learning_starts", [0, 1000, 5000, 10000, 20000]
482
)
483
484
train_freq = trial.suggest_categorical("train_freq", [1, 4, 8, 16, 128, 256, 1000])
485
subsample_steps = trial.suggest_categorical("subsample_steps", [1, 2, 4, 8])
486
gradient_steps = max(train_freq // subsample_steps, 1)
487
488
net_arch = trial.suggest_categorical("net_arch", ["tiny", "small", "medium"])
489
490
net_arch = {"tiny": [64], "small": [64, 64], "medium": [256, 256]}[net_arch]
491
492
hyperparams = {
493
"gamma": gamma,
494
"learning_rate": learning_rate,
495
"batch_size": batch_size,
496
"buffer_size": buffer_size,
497
"train_freq": train_freq,
498
"gradient_steps": gradient_steps,
499
"exploration_fraction": exploration_fraction,
500
"exploration_final_eps": exploration_final_eps,
501
"target_update_interval": target_update_interval,
502
"learning_starts": learning_starts,
503
"policy_kwargs": dict(net_arch=net_arch),
504
}
505
506
if trial.using_her_replay_buffer:
507
hyperparams = sample_her_params(trial, hyperparams)
508
509
return hyperparams
510
511
512
def sample_her_params(
513
trial: optuna.Trial, hyperparams: dict[str, Any]
514
) -> dict[str, Any]:
515
"""
516
Sampler for HerReplayBuffer hyperparams.
517
518
:param trial:
519
:parma hyperparams:
520
:return:
521
"""
522
her_kwargs = trial.her_kwargs.copy()
523
her_kwargs["n_sampled_goal"] = trial.suggest_int("n_sampled_goal", 1, 5)
524
her_kwargs["goal_selection_strategy"] = trial.suggest_categorical(
525
"goal_selection_strategy", ["final", "episode", "future"]
526
)
527
her_kwargs["online_sampling"] = trial.suggest_categorical(
528
"online_sampling", [True, False]
529
)
530
hyperparams["replay_buffer_kwargs"] = her_kwargs
531
return hyperparams
532
533
534
def sample_tqc_params(trial: optuna.Trial) -> dict[str, Any]:
535
"""
536
Sampler for TQC hyperparams.
537
538
:param trial:
539
:return:
540
"""
541
# TQC is SAC + Distributional RL
542
hyperparams = sample_sac_params(trial)
543
544
n_quantiles = trial.suggest_int("n_quantiles", 5, 50)
545
top_quantiles_to_drop_per_net = trial.suggest_int(
546
"top_quantiles_to_drop_per_net", 0, n_quantiles - 1
547
)
548
549
hyperparams["policy_kwargs"].update({"n_quantiles": n_quantiles})
550
hyperparams["top_quantiles_to_drop_per_net"] = top_quantiles_to_drop_per_net
551
552
return hyperparams
553
554
555
def sample_qrdqn_params(trial: optuna.Trial) -> dict[str, Any]:
556
"""
557
Sampler for QR-DQN hyperparams.
558
559
:param trial:
560
:return:
561
"""
562
# TQC is DQN + Distributional RL
563
hyperparams = sample_dqn_params(trial)
564
565
n_quantiles = trial.suggest_int("n_quantiles", 5, 200)
566
hyperparams["policy_kwargs"].update({"n_quantiles": n_quantiles})
567
568
return hyperparams
569
570
571
def sample_ars_params(trial: optuna.Trial) -> dict[str, Any]:
572
"""
573
Sampler for ARS hyperparams.
574
:param trial:
575
:return:
576
"""
577
# n_eval_episodes = trial.suggest_categorical("n_eval_episodes", [1, 2])
578
n_delta = trial.suggest_categorical("n_delta", [4, 8, 6, 32, 64])
579
# learning_rate = trial.suggest_categorical("learning_rate", [0.01, 0.02, 0.025, 0.03])
580
learning_rate = trial.suggest_loguniform("learning_rate", 1e-5, 1)
581
delta_std = trial.suggest_categorical(
582
"delta_std", [0.01, 0.02, 0.025, 0.03, 0.05, 0.1, 0.2, 0.3]
583
)
584
top_frac_size = trial.suggest_categorical(
585
"top_frac_size", [0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.0]
586
)
587
zero_policy = trial.suggest_categorical("zero_policy", [True, False])
588
n_top = max(int(top_frac_size * n_delta), 1)
589
590
# net_arch = trial.suggest_categorical("net_arch", ["linear", "tiny", "small"])
591
592
# Note: remove bias to be as the original linear policy
593
# and do not squash output
594
# Comment out when doing hyperparams search with linear policy only
595
# net_arch = {
596
# "linear": [],
597
# "tiny": [16],
598
# "small": [32],
599
# }[net_arch]
600
601
# TODO: optimize the alive_bonus_offset too
602
603
return {
604
# "n_eval_episodes": n_eval_episodes,
605
"n_delta": n_delta,
606
"learning_rate": learning_rate,
607
"delta_std": delta_std,
608
"n_top": n_top,
609
"zero_policy": zero_policy,
610
# "policy_kwargs": dict(net_arch=net_arch),
611
}
612
613
614
HYPERPARAMS_SAMPLER = {
615
"a2c": sample_a2c_params,
616
"ars": sample_ars_params,
617
"ddpg": sample_ddpg_params,
618
"dqn": sample_dqn_params,
619
"qrdqn": sample_qrdqn_params,
620
"sac": sample_sac_params,
621
"tqc": sample_tqc_params,
622
"ppo": sample_ppo_params,
623
"td3": sample_td3_params,
624
"trpo": sample_trpo_params,
625
}
626
627