Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
rasbt
GitHub Repository: rasbt/machine-learning-book
Path: blob/main/ch13/ch13_part3_lightning.ipynb
1245 views
Kernel: Python 3 (ipykernel)

Machine Learning with PyTorch and Scikit-Learn

-- Code Examples

Package version checks

Add folder to path in order to load from the check_packages.py script:

import sys sys.path.insert(0, '..')

Check recommended package versions:

from python_environment_check import check_packages d = { 'torch': '1.8', 'torchvision': '0.9.0', 'tensorboard': '2.7.0', 'pytorch_lightning': '1.5.0', 'torchmetrics': '0.6.2' } check_packages(d)
[OK] Your Python version is 3.8.12 | packaged by conda-forge | (default, Oct 12 2021, 21:59:51) [GCC 9.4.0] [OK] torch 1.10.1+cu102 [OK] torchvision 0.11.2+cu102 [OK] tensorboard 2.7.0 [OK] pytorch_lightning 1.5.1 [OK] torchmetrics 0.6.2

Chapter 13: Going Deeper -- the Mechanics of PyTorch (Part 3/3)

Higher-level PyTorch APIs: a short introduction to PyTorch Lightning

Setting up the PyTorch Lightning model

Higher-level PyTorch APIs: a short introduction to PyTorch Lightning

Setting up the PyTorch Lightning model

import pytorch_lightning as pl import torch import torch.nn as nn from torchmetrics import __version__ as torchmetrics_version from pkg_resources import parse_version from torchmetrics import Accuracy
class MultiLayerPerceptron(pl.LightningModule): def __init__(self, image_shape=(1, 28, 28), hidden_units=(32, 16)): super().__init__() # new PL attributes: if parse_version(torchmetrics_version) > parse_version("0.8"): self.train_acc = Accuracy(task="multiclass", num_classes=10) self.valid_acc = Accuracy(task="multiclass", num_classes=10) self.test_acc = Accuracy(task="multiclass", num_classes=10) else: self.train_acc = Accuracy() self.valid_acc = Accuracy() self.test_acc = Accuracy() # Model similar to previous section: input_size = image_shape[0] * image_shape[1] * image_shape[2] all_layers = [nn.Flatten()] for hidden_unit in hidden_units: layer = nn.Linear(input_size, hidden_unit) all_layers.append(layer) all_layers.append(nn.ReLU()) input_size = hidden_unit all_layers.append(nn.Linear(hidden_units[-1], 10)) self.model = nn.Sequential(*all_layers) def forward(self, x): x = self.model(x) return x def training_step(self, batch, batch_idx): x, y = batch logits = self(x) loss = nn.functional.cross_entropy(logits, y) preds = torch.argmax(logits, dim=1) self.train_acc.update(preds, y) self.log("train_loss", loss, prog_bar=True) return loss # Conditionally define epoch end methods based on PyTorch Lightning version if parse_version(pl.__version__) >= parse_version("2.0"): # For PyTorch Lightning 2.0 and above def on_training_epoch_end(self): self.log("train_acc", self.train_acc.compute()) self.train_acc.reset() def on_validation_epoch_end(self): self.log("valid_acc", self.valid_acc.compute()) self.valid_acc.reset() def on_test_epoch_end(self): self.log("test_acc", self.test_acc.compute()) self.test_acc.reset() else: # For PyTorch Lightning < 2.0 def training_epoch_end(self, outs): self.log("train_acc", self.train_acc.compute()) self.train_acc.reset() def validation_epoch_end(self, outs): self.log("valid_acc", self.valid_acc.compute()) self.valid_acc.reset() def test_epoch_end(self, outs): self.log("test_acc", self.test_acc.compute()) self.test_acc.reset() def test_step(self, batch, batch_idx): x, y = batch logits = self(x) loss = nn.functional.cross_entropy(logits, y) preds = torch.argmax(logits, dim=1) self.test_acc.update(preds, y) self.log("test_loss", loss, prog_bar=True) self.log("test_acc", self.test_acc.compute(), prog_bar=True) return loss def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=0.001) return optimizer

Setting up the data loaders

from torch.utils.data import DataLoader from torch.utils.data import random_split from torchvision.datasets import MNIST from torchvision import transforms
class MnistDataModule(pl.LightningDataModule): def __init__(self, data_path='./'): super().__init__() self.data_path = data_path self.transform = transforms.Compose([transforms.ToTensor()]) def prepare_data(self): MNIST(root=self.data_path, download=True) def setup(self, stage=None): # stage is either 'fit', 'validate', 'test', or 'predict' # here note relevant mnist_all = MNIST( root=self.data_path, train=True, transform=self.transform, download=False ) self.train, self.val = random_split( mnist_all, [55000, 5000], generator=torch.Generator().manual_seed(1) ) self.test = MNIST( root=self.data_path, train=False, transform=self.transform, download=False ) def train_dataloader(self): return DataLoader(self.train, batch_size=64, num_workers=4) def val_dataloader(self): return DataLoader(self.val, batch_size=64, num_workers=4) def test_dataloader(self): return DataLoader(self.test, batch_size=64, num_workers=4) torch.manual_seed(1) mnist_dm = MnistDataModule()

Training the model using the PyTorch Lightning Trainer class

from pytorch_lightning.callbacks import ModelCheckpoint mnistclassifier = MultiLayerPerceptron() callbacks = [ModelCheckpoint(save_top_k=1, mode='max', monitor="valid_acc")] # save top 1 model if torch.cuda.is_available(): # if you have GPUs trainer = pl.Trainer(max_epochs=10, callbacks=callbacks, gpus=1) else: trainer = pl.Trainer(max_epochs=10, callbacks=callbacks) trainer.fit(model=mnistclassifier, datamodule=mnist_dm)
GPU available: True, used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] 2022-02-21 00:05:52.412727: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 | Name | Type | Params ----------------------------------------- 0 | train_acc | Accuracy | 0 1 | valid_acc | Accuracy | 0 2 | test_acc | Accuracy | 0 3 | model | Sequential | 25.8 K ----------------------------------------- 25.8 K Trainable params 0 Non-trainable params 25.8 K Total params 0.103 Total estimated model params size (MB)
Validation sanity check: 0it [00:00, ?it/s]
Training: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]

Evaluating the model using TensorBoard

trainer.test(model=mnistclassifier, datamodule=mnist_dm, ckpt_path='best')
Restoring states from the checkpoint path at /home/jovyan/ch13/lightning_logs/version_0/checkpoints/epoch=8-step=7739.ckpt LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] Loaded model weights from checkpoint at /home/jovyan/ch13/lightning_logs/version_0/checkpoints/epoch=8-step=7739.ckpt
Testing: 0it [00:00, ?it/s]
-------------------------------------------------------------------------------- DATALOADER:0 TEST RESULTS {'test_acc': 0.9499600529670715, 'test_loss': 0.14912301301956177} --------------------------------------------------------------------------------
[{'test_loss': 0.14912301301956177, 'test_acc': 0.9499600529670715}]
from IPython.display import Image Image(filename='figures/13_09.png')
Image in a Jupyter notebook
# Start tensorboard %load_ext tensorboard %tensorboard --logdir lightning_logs/
Image(filename='figures/13_10.png')
Image in a Jupyter notebook
path = 'lightning_logs/version_0/checkpoints/epoch=8-step=7739.ckpt' if torch.cuda.is_available(): # if you have GPUs trainer = pl.Trainer( max_epochs=15, callbacks=callbacks, resume_from_checkpoint=path, gpus=1 ) else: trainer = pl.Trainer( max_epochs=15, callbacks=callbacks, resume_from_checkpoint=path ) trainer.fit(model=mnistclassifier, datamodule=mnist_dm)
/home/jovyan/conda/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/checkpoint_connector.py:45: LightningDeprecationWarning: Setting `Trainer(resume_from_checkpoint=)` is deprecated in v1.5 and will be removed in v1.7. Please pass `Trainer.fit(ckpt_path=)` directly instead. rank_zero_deprecation( Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback. GPU available: True, used: True TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs /home/jovyan/conda/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py:1893: LightningDeprecationWarning: `trainer.resume_from_checkpoint` is deprecated in v1.5 and will be removed in v1.7. Specify the fit checkpoint path with `trainer.fit(ckpt_path=)` instead. rank_zero_deprecation( /home/jovyan/conda/lib/python3.8/site-packages/pytorch_lightning/core/datamodule.py:469: LightningDeprecationWarning: DataModule.setup has already been called, so it will not be called again. In v1.6 this behavior will change to always call DataModule.setup. rank_zero_deprecation( Restoring states from the checkpoint path at lightning_logs/version_0/checkpoints/epoch=8-step=7739.ckpt LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] /home/jovyan/conda/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/checkpoint_connector.py:247: UserWarning: You're resuming from a checkpoint that ended mid-epoch. Training will start from the beginning of the next epoch. This can cause unreliable results if further training is done, consider using an end of epoch checkpoint. rank_zero_warn( Restored all states from the checkpoint file at lightning_logs/version_0/checkpoints/epoch=8-step=7739.ckpt | Name | Type | Params ----------------------------------------- 0 | train_acc | Accuracy | 0 1 | valid_acc | Accuracy | 0 2 | test_acc | Accuracy | 0 3 | model | Sequential | 25.8 K ----------------------------------------- 25.8 K Trainable params 0 Non-trainable params 25.8 K Total params 0.103 Total estimated model params size (MB) /home/jovyan/conda/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:617: UserWarning: Checkpoint directory /home/jovyan/ch13/lightning_logs/version_0/checkpoints exists and is not empty. rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
Validation sanity check: 0it [00:00, ?it/s]
Training: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
Validating: 0it [00:00, ?it/s]
/home/jovyan/conda/lib/python3.8/site-packages/pytorch_lightning/core/datamodule.py:469: LightningDeprecationWarning: DataModule.teardown has already been called, so it will not be called again. In v1.6 this behavior will change to always call DataModule.teardown. rank_zero_deprecation(
from IPython.display import Image Image(filename='figures/13_11.png')
Image in a Jupyter notebook
%tensorboard --logdir lightning_logs/
Reusing TensorBoard on port 6006 (pid 702), started 0:02:27 ago. (Use '!kill 702' to kill it.)
trainer.test(model=mnistclassifier, datamodule=mnist_dm)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Testing: 0it [00:00, ?it/s]
-------------------------------------------------------------------------------- DATALOADER:0 TEST RESULTS {'test_acc': 0.9542893767356873, 'test_loss': 0.14718961715698242} --------------------------------------------------------------------------------
[{'test_loss': 0.14718961715698242, 'test_acc': 0.9542893767356873}]
trainer.test(model=mnistclassifier, datamodule=mnist_dm, ckpt_path='best')
Restoring states from the checkpoint path at /home/jovyan/ch13/lightning_logs/version_0/checkpoints/epoch=13-step=12039.ckpt LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] Loaded model weights from checkpoint at /home/jovyan/ch13/lightning_logs/version_0/checkpoints/epoch=13-step=12039.ckpt
Testing: 0it [00:00, ?it/s]
-------------------------------------------------------------------------------- DATALOADER:0 TEST RESULTS {'test_acc': 0.9559454321861267, 'test_loss': 0.14512717723846436} --------------------------------------------------------------------------------
[{'test_loss': 0.14512717723846436, 'test_acc': 0.9559454321861267}]
path = "lightning_logs/version_0/checkpoints/epoch=13-step=12039.ckpt" model = MultiLayerPerceptron.load_from_checkpoint(path)

Summary


Readers may ignore the next cell.

! python ../.convert_notebook_to_script.py --input ch13_part3_lightning.ipynb --output ch13_part3_lightning.py
[NbConvertApp] Converting notebook ch13_part3_lightning.ipynb to script [NbConvertApp] Writing 7321 bytes to ch13_part3_lightning.py