Path: blob/master/labml_nn/helpers/device.py
4918 views
import torch12from labml.configs import BaseConfigs, hyperparams, option345class DeviceInfo:6def __init__(self, *,7use_cuda: bool,8cuda_device: int):9self.use_cuda = use_cuda10self.cuda_device = cuda_device11self.cuda_count = torch.cuda.device_count()1213self.is_cuda = self.use_cuda and torch.cuda.is_available()14if not self.is_cuda:15self.device = torch.device('cpu')16else:17if self.cuda_device < self.cuda_count:18self.device = torch.device('cuda', self.cuda_device)19else:20self.device = torch.device('cuda', self.cuda_count - 1)2122def __str__(self):23if not self.is_cuda:24return "CPU"2526if self.cuda_device < self.cuda_count:27return f"GPU:{self.cuda_device} - {torch.cuda.get_device_name(self.cuda_device)}"28else:29return (f"GPU:{self.cuda_count - 1}({self.cuda_device}) "30f"- {torch.cuda.get_device_name(self.cuda_count - 1)}")313233class DeviceConfigs(BaseConfigs):34r"""35This is a configurable module to get a single device to train model on.36It can pick up CUDA devices and it will fall back to CPU if they are not available.3738It has other small advantages such as being able to view the39actual device name on configurations view of40`labml app <https://github.com/labmlai/labml/tree/master/app>`_4142Arguments:43cuda_device (int): The CUDA device number. Defaults to ``0``.44use_cuda (bool): Whether to use CUDA devices. Defaults to ``True``.45"""46cuda_device: int = 047use_cuda: bool = True4849device_info: DeviceInfo5051device: torch.device5253def __init__(self):54super().__init__(_primary='device')555657@option(DeviceConfigs.device)58def _device(c: DeviceConfigs):59return c.device_info.device606162hyperparams(DeviceConfigs.cuda_device, DeviceConfigs.use_cuda,63is_hyperparam=False)646566@option(DeviceConfigs.device_info)67def _device_info(c: DeviceConfigs):68return DeviceInfo(use_cuda=c.use_cuda,69cuda_device=c.cuda_device)707172