# coding: utf-8123import sys4from python_environment_check import check_packages5import torch6import numpy as np7from torch.utils.data import DataLoader8from torch.utils.data import Dataset9from torch.utils.data import TensorDataset10import pathlib11import matplotlib.pyplot as plt12import os13from PIL import Image14import torchvision.transforms as transforms15import torchvision16from itertools import islice1718# # Machine Learning with PyTorch and Scikit-Learn19# # -- Code Examples2021# ## Package version checks2223# Add folder to path in order to load from the check_packages.py script:24252627sys.path.insert(0, '..')282930# Check recommended package versions:313233343536d = {37'numpy': '1.21.2',38'matplotlib': '3.4.3',39'torch': '1.9.0',40}41check_packages(d)424344# # Chapter 12: Parallelizing Neural Network Training with PyTorch (Part 1/2)45#4647# - [PyTorch and training performance](#PyTorch-and-training-performance)48# - [Performance challenges](#Performance-challenges)49# - [What is PyTorch?](#What-is-PyTorch?)50# - [How we will learn PyTorch](#How-we-will-learn-PyTorch)51# - [First steps with PyTorch](#First-steps-with-PyTorch)52# - [Installing PyTorch](#Installing-PyTorch)53# - [Creating tensors in PyTorch](#Creating-tensors-in-PyTorch)54# - [Manipulating the data type and shape of a tensor](#Manipulating-the-data-type-and-shape-of-a-tensor)55# - [Applying mathematical operations to tensors](#Applying-mathematical-operations-to-tensors)56# - [Split, stack, and concatenate tensors](#Split,-stack,-and-concatenate-tensors)5758# Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s).596061626364# ## PyTorch and training performance6566# ### Performance challenges67686970IPythonImage(filename='figures/12_01.png', width=500)717273# ### What is PyTorch?74757677IPythonImage(filename='figures/12_02.png', width=500)787980# ### How we will learn PyTorch8182# ## First steps with PyTorch8384# ### Installing PyTorch85868788#! pip install torch899091929394print('PyTorch version:', torch.__version__)9596np.set_printoptions(precision=3)979899100101102103# ### Creating tensors in PyTorch104105106107a = [1, 2, 3]108b = np.array([4, 5, 6], dtype=np.int32)109110t_a = torch.tensor(a)111t_b = torch.from_numpy(b)112113print(t_a)114print(t_b)115116117118119torch.is_tensor(a), torch.is_tensor(t_a)120121122123124t_ones = torch.ones(2, 3)125126t_ones.shape127128129130131print(t_ones)132133134135136rand_tensor = torch.rand(2,3)137138print(rand_tensor)139140141# ### Manipulating the data type and shape of a tensor142143144145t_a_new = t_a.to(torch.int64)146147print(t_a_new.dtype)148149150151152t = torch.rand(3, 5)153154t_tr = torch.transpose(t, 0, 1)155print(t.shape, ' --> ', t_tr.shape)156157158159160t = torch.zeros(30)161162t_reshape = t.reshape(5, 6)163164print(t_reshape.shape)165166167168169t = torch.zeros(1, 2, 1, 4, 1)170171t_sqz = torch.squeeze(t, 2)172173print(t.shape, ' --> ', t_sqz.shape)174175176# ### Applying mathematical operations to tensors177178179180torch.manual_seed(1)181182t1 = 2 * torch.rand(5, 2) - 1183t2 = torch.normal(mean=0, std=1, size=(5, 2))184185186187188t3 = torch.multiply(t1, t2)189print(t3)190191192193194t4 = torch.mean(t1, axis=0)195print(t4)196197198199200t5 = torch.matmul(t1, torch.transpose(t2, 0, 1))201202print(t5)203204205206207t6 = torch.matmul(torch.transpose(t1, 0, 1), t2)208209print(t6)210211212213214norm_t1 = torch.linalg.norm(t1, ord=2, dim=1)215216print(norm_t1)217218219220221np.sqrt(np.sum(np.square(t1.numpy()), axis=1))222223224# ### Split, stack, and concatenate tensors225226227228torch.manual_seed(1)229230t = torch.rand(6)231232print(t)233234t_splits = torch.chunk(t, 3)235236[item.numpy() for item in t_splits]237238239240241torch.manual_seed(1)242t = torch.rand(5)243244print(t)245246t_splits = torch.split(t, split_size_or_sections=[3, 2])247248[item.numpy() for item in t_splits]249250251252253A = torch.ones(3)254B = torch.zeros(2)255256C = torch.cat([A, B], axis=0)257print(C)258259260261262A = torch.ones(3)263B = torch.zeros(3)264265S = torch.stack([A, B], axis=1)266print(S)267268269# ## Building input pipelines in PyTorch270271# ### Creating a PyTorch DataLoader from existing tensors272273274275276t = torch.arange(6, dtype=torch.float32)277data_loader = DataLoader(t)278279280281282for item in data_loader:283print(item)284285286287288data_loader = DataLoader(t, batch_size=3, drop_last=False)289290for i, batch in enumerate(data_loader, 1):291print(f'batch {i}:', batch)292293294# ### Combining two tensors into a joint dataset295296297298299class JointDataset(Dataset):300def __init__(self, x, y):301self.x = x302self.y = y303def __len__(self):304return len(self.x)305def __getitem__(self, idx):306return self.x[idx], self.y[idx]307308309310311torch.manual_seed(1)312313t_x = torch.rand([4, 3], dtype=torch.float32)314t_y = torch.arange(4)315joint_dataset = JointDataset(t_x, t_y)316317# Or use TensorDataset directly318joint_dataset = TensorDataset(t_x, t_y)319320for example in joint_dataset:321print(' x: ', example[0],322' y: ', example[1])323324325# ### Shuffle, batch, and repeat326327328329torch.manual_seed(1)330data_loader = DataLoader(dataset=joint_dataset, batch_size=2, shuffle=True)331332for i, batch in enumerate(data_loader, 1):333print(f'batch {i}:', 'x:', batch[0],334'\n y:', batch[1])335336for epoch in range(2):337print(f'epoch {epoch+1}')338for i, batch in enumerate(data_loader, 1):339print(f'batch {i}:', 'x:', batch[0],340'\n y:', batch[1])341342343# ### Creating a dataset from files on your local storage disk344345346347348imgdir_path = pathlib.Path('cat_dog_images')349350file_list = sorted([str(path) for path in imgdir_path.glob('*.jpg')])351352print(file_list)353354355356357358359fig = plt.figure(figsize=(10, 5))360for i, file in enumerate(file_list):361img = Image.open(file)362print('Image shape: ', np.array(img).shape)363ax = fig.add_subplot(2, 3, i+1)364ax.set_xticks([]); ax.set_yticks([])365ax.imshow(img)366ax.set_title(os.path.basename(file), size=15)367368#plt.savefig('figures/12_03.pdf')369plt.tight_layout()370plt.show()371372373374375labels = [1 if 'dog' in os.path.basename(file) else 0376for file in file_list]377print(labels)378379380381382class ImageDataset(Dataset):383def __init__(self, file_list, labels):384self.file_list = file_list385self.labels = labels386387def __getitem__(self, index):388file = self.file_list[index]389label = self.labels[index]390return file, label391392def __len__(self):393return len(self.labels)394395image_dataset = ImageDataset(file_list, labels)396for file, label in image_dataset:397print(file, label)398399400401402403class ImageDataset(Dataset):404def __init__(self, file_list, labels, transform=None):405self.file_list = file_list406self.labels = labels407self.transform = transform408def __getitem__(self, index):409img = Image.open(self.file_list[index])410if self.transform is not None:411img = self.transform(img)412label = self.labels[index]413return img, label414def __len__(self):415return len(self.labels)416417img_height, img_width = 80, 120418419transform = transforms.Compose([420transforms.ToTensor(),421transforms.Resize((img_height, img_width)),422])423424image_dataset = ImageDataset(file_list, labels, transform)425426427428429fig = plt.figure(figsize=(10, 6))430for i, example in enumerate(image_dataset):431ax = fig.add_subplot(2, 3, i+1)432ax.set_xticks([]); ax.set_yticks([])433ax.imshow(example[0].numpy().transpose((1, 2, 0)))434ax.set_title(f'{example[1]}', size=15)435436plt.tight_layout()437plt.savefig('figures/12_04.pdf')438plt.show()439440441# ### Fetching available datasets from the torchvision.datasets library442443444445# ! pip install torchvision446447448449450451452# **Fetching CelebA dataset**453#454# ---455456# 1. Downloading the image files manually457#458# - You can try setting `download=True` below. If this results in a `BadZipfile` error, we recommend downloading the `img_align_celeba.zip` file manually from http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html. In the Google Drive folder, you can find it under the `Img` folder as shown below:459460461462IPythonImage(filename='figures/gdrive-download-location-1.png', width=500)463464465# - You can also try this direct link: https://drive.google.com/file/d/0B7EVK8r0v71pZjFTYXZWM3FlRnM/view?usp=sharing&resourcekey=0-dYn9z10tMJOBAkviAcfdyQ466# - After downloading, please put this file into the `./celeba` subolder and unzip it.467468# 2. Next, you need to download the annotation files and put them into the same `./celeba` subfolder. The annotation files can be found under `Anno`:469470471472IPythonImage(filename='figures/gdrive-download-location-2.png', width=300)473474475# - direct links are provided below:476# - [identity_CelebA.txt](https://drive.google.com/file/d/1_ee_0u7vcNLOfNLegJRHmolfH5ICW-XS/view?usp=sharing)477# - [list_attr_celeba.txt](https://drive.google.com/file/d/0B7EVK8r0v71pblRyaVFSWGxPY0U/view?usp=sharing&resourcekey=0-YW2qIuRcWHy_1C2VaRGL3Q)478# - [list_bbox_celeba.txt](https://drive.google.com/file/d/0B7EVK8r0v71pbThiMVRxWXZ4dU0/view?usp=sharing&resourcekey=0-z-17UMo1wt4moRL2lu9D8A)479# - [list_landmarks_align_celeba.txt](https://drive.google.com/file/d/0B7EVK8r0v71pd0FJY3Blby1HUTQ/view?usp=sharing&resourcekey=0-aFtzLN5nfdhHXpAsgYA8_g)480# - [list_landmarks_celeba.txt](https://drive.google.com/file/d/0B7EVK8r0v71pTzJIdlJWdHczRlU/view?usp=sharing&resourcekey=0-49BtYuqFDomi-1v0vNVwrQ)481482483484IPythonImage(filename='figures/gdrive-download-location-3.png', width=300)485486487# 3. Lastly, you need to download the file `list_eval_partition.txt` and place it under `./celeba`:488489# - [list_eval_partition.txt](https://drive.google.com/file/d/0B7EVK8r0v71pY0NSMzRuSXJEVkk/view?usp=sharing&resourcekey=0-i4TGCi_51OtQ5K9FSp4EDg)490491# After completing steps 1-3 above, please ensure you have the following files in your `./celeba` subfolder, and the files are non-empty (that is, they have similar file sizes as shown below):492493494495IPythonImage(filename='figures/celeba-files.png', width=400)496497498# ---499500501502image_path = './'503celeba_dataset = torchvision.datasets.CelebA(image_path, split='train', target_type='attr', download=False)504505assert isinstance(celeba_dataset, torch.utils.data.Dataset)506507508509510example = next(iter(celeba_dataset))511print(example)512513514515516fig = plt.figure(figsize=(12, 8))517for i, (image, attributes) in islice(enumerate(celeba_dataset), 18):518ax = fig.add_subplot(3, 6, i+1)519ax.set_xticks([]); ax.set_yticks([])520ax.imshow(image)521ax.set_title(f'{attributes[31]}', size=15)522523#plt.savefig('figures/12_05.pdf')524plt.show()525526527528529mnist_dataset = torchvision.datasets.MNIST(image_path, 'train', download=True)530531assert isinstance(mnist_dataset, torch.utils.data.Dataset)532533example = next(iter(mnist_dataset))534print(example)535536fig = plt.figure(figsize=(15, 6))537for i, (image, label) in islice(enumerate(mnist_dataset), 10):538ax = fig.add_subplot(2, 5, i+1)539ax.set_xticks([]); ax.set_yticks([])540ax.imshow(image, cmap='gray_r')541ax.set_title(f'{label}', size=15)542543#plt.savefig('figures/12_06.pdf')544plt.show()545546547# ---548#549# Readers may ignore the next cell.550551552553554555556