Path: blob/master/labml_nn/helpers/optimizer.py
4918 views
from typing import Tuple12import torch3from labml import tracker45from labml.configs import BaseConfigs, option, meta_config678class OptimizerConfigs(BaseConfigs):9r"""10This creates a configurable optimizer.1112Arguments:13learning_rate (float): Learning rate of the optimizer. Defaults to ``0.01``.14momentum (float): Momentum of the optimizer. Defaults to ``0.5``.15parameters: Model parameters to optimize.16d_model (int): Embedding size of the model (for Noam optimizer).17betas (Tuple[float, float]): Betas for Adam optimizer. Defaults to ``(0.9, 0.999)``.18eps (float): Epsilon for Adam/RMSProp optimizers. Defaults to ``1e-8``.19step_factor (int): Step factor for Noam optimizer. Defaults to ``1024``.2021Also there is a better (more options) implementation in ``labml_nn``.22`We recommend using that <https://nn.labml.ai/optimizers/configs.html>`_.23"""2425optimizer: torch.optim.Adam26learning_rate: float = 0.0127momentum: float = 0.528parameters: any29d_model: int30betas: Tuple[float, float] = (0.9, 0.999)31eps: float = 1e-832step_factor: int = 10243334def __init__(self):35super().__init__(_primary='optimizer')363738meta_config(OptimizerConfigs.parameters)394041@option(OptimizerConfigs.optimizer, 'SGD')42def sgd_optimizer(c: OptimizerConfigs):43return torch.optim.SGD(c.parameters, c.learning_rate, c.momentum)444546@option(OptimizerConfigs.optimizer, 'Adam')47def adam_optimizer(c: OptimizerConfigs):48return torch.optim.Adam(c.parameters, lr=c.learning_rate,49betas=c.betas, eps=c.eps)505152class NoamOpt:53def __init__(self, model_size: int, learning_rate: float, warmup: int, step_factor: int, optimizer):54self.step_factor = step_factor55self.optimizer = optimizer56self.warmup = warmup57self.learning_rate = learning_rate58self.model_size = model_size59self._rate = 06061def step(self):62rate = self.rate(tracker.get_global_step() / self.step_factor)63for p in self.optimizer.param_groups:64p['lr'] = rate65self._rate = rate66self.optimizer.step()6768def rate(self, step):69factor = self.model_size ** (-0.5) * min(step ** (-0.5), step * self.warmup ** (-1.5))70return self.learning_rate * factor7172def zero_grad(self):73self.optimizer.zero_grad()747576@option(OptimizerConfigs.optimizer, 'Noam')77def noam_optimizer(c: OptimizerConfigs):78optimizer = torch.optim.Adam(c.parameters, lr=0.0, betas=c.betas, eps=c.eps)79return NoamOpt(c.d_model, 1, 2000, c.step_factor, optimizer)808182def _test_noam_optimizer():83import matplotlib.pyplot as plt84import numpy as np8586opts = [NoamOpt(512, 1, 4000, None),87NoamOpt(512, 1, 8000, None),88NoamOpt(2048, 1, 2000, None)]89plt.plot(np.arange(1, 20000), [[opt.rate(i) for opt in opts] for i in range(1, 20000)])90plt.legend(["512:4000", "512:8000", "256:4000"])91plt.title("Optimizer")92plt.show()939495if __name__ == '__main__':96_test_noam_optimizer()979899