Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hackassin
GitHub Repository: hackassin/learnopencv
Path: blob/master/Deep-Convolutional-GAN/PyTorch/DCGAN_Anime_Pytorch.ipynb
3142 views
Kernel: Python 3 (ipykernel)
import os import torch import numpy as np import torch.nn as nn import torch.optim as optim from torchvision import datasets, transforms from torch.autograd import Variable from torchvision.utils import save_image from torchvision.utils import make_grid from torch.utils.tensorboard import SummaryWriter from torchsummary import summary import datetime import matplotlib.pyplot as plt
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
torch.manual_seed(1)
<torch._C.Generator at 0x7fe20c186250>
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_size = 128
train_transform = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) train_dataset = datasets.ImageFolder(root='../dcgan/anime', transform=train_transform) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
def show_images(images): fig, ax = plt.subplots(figsize=(20, 20)) ax.set_xticks([]); ax.set_yticks([]) ax.imshow(make_grid(images.detach(), nrow=22).permute(1, 2, 0)) def show_batch(dl): for images, _ in dl: show_images(images) break
show_batch(train_loader)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Image in a Jupyter notebook
image_shape = (3, 64, 64) image_dim = int(np.prod(image_shape)) latent_dim = 100
# custom weights initialization called on generator and discriminator def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: torch.nn.init.normal_(m.weight, 0.0, 0.02) elif classname.find('BatchNorm') != -1: torch.nn.init.normal_(m.weight, 1.0, 0.02) torch.nn.init.zeros_(m.bias)
# Generator Model Class Definition class Generator(nn.Module): def __init__(self): super(Generator, self).__init__() self.main = nn.Sequential( # Block 1:input is Z, going into a convolution nn.ConvTranspose2d(latent_dim, 64 * 8, 4, 1, 0, bias=False), nn.BatchNorm2d(64 * 8), nn.ReLU(True), # Block 2: (64 * 8) x 4 x 4 nn.ConvTranspose2d(64 * 8, 64 * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(64 * 4), nn.ReLU(True), # Block 3: (64 * 4) x 8 x 8 nn.ConvTranspose2d(64 * 4, 64 * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(64 * 2), nn.ReLU(True), # Block 4: (64 * 2) x 16 x 16 nn.ConvTranspose2d(64 * 2, 64, 4, 2, 1, bias=False), nn.BatchNorm2d(64), nn.ReLU(True), # Block 5: (64) x 32 x 32 nn.ConvTranspose2d(64, 3, 4, 2, 1, bias=False), nn.Tanh() # Output: (3) x 64 x 64 ) def forward(self, input): output = self.main(input) return output
generator = Generator().to(device) generator.apply(weights_init) print(generator)
Generator( (main): Sequential( (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace=True) (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False) (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): ReLU(inplace=True) (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False) (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (8): ReLU(inplace=True) (9): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False) (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (11): ReLU(inplace=True) (12): ConvTranspose2d(64, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False) (13): Tanh() ) )
summary(generator, (100,1,1))
---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ ConvTranspose2d-1 [-1, 512, 4, 4] 819,200 BatchNorm2d-2 [-1, 512, 4, 4] 1,024 ReLU-3 [-1, 512, 4, 4] 0 ConvTranspose2d-4 [-1, 256, 8, 8] 2,097,152 BatchNorm2d-5 [-1, 256, 8, 8] 512 ReLU-6 [-1, 256, 8, 8] 0 ConvTranspose2d-7 [-1, 128, 16, 16] 524,288 BatchNorm2d-8 [-1, 128, 16, 16] 256 ReLU-9 [-1, 128, 16, 16] 0 ConvTranspose2d-10 [-1, 64, 32, 32] 131,072 BatchNorm2d-11 [-1, 64, 32, 32] 128 ReLU-12 [-1, 64, 32, 32] 0 ConvTranspose2d-13 [-1, 3, 64, 64] 3,072 Tanh-14 [-1, 3, 64, 64] 0 ================================================================ Total params: 3,576,704 Trainable params: 3,576,704 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.00 Forward/backward pass size (MB): 3.00 Params size (MB): 13.64 Estimated Total Size (MB): 16.64 ----------------------------------------------------------------
# Discriminator Model Class Definition class Discriminator(nn.Module): def __init__(self): super(Discriminator, self).__init__() self.main = nn.Sequential( # Block 1: (3) x 64 x 64 nn.Conv2d(3, 64, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # Block 2: (64) x 32 x 32 nn.Conv2d(64, 64 * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(64 * 2), nn.LeakyReLU(0.2, inplace=True), # Block 3: (64*2) x 16 x 16 nn.Conv2d(64 * 2, 64 * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(64 * 4), nn.LeakyReLU(0.2, inplace=True), # Block 4: (64*4) x 8 x 8 nn.Conv2d(64 * 4, 64 * 8, 4, 2, 1, bias=False), nn.BatchNorm2d(64 * 8), nn.LeakyReLU(0.2, inplace=True), # Block 5: (64*8) x 4 x 4 nn.Conv2d(64 * 8, 1, 4, 1, 0, bias=False), nn.Sigmoid(), nn.Flatten() # Output: 1 ) def forward(self, input): output = self.main(input) return output
discriminator = Discriminator().to(device) discriminator.apply(weights_init) print(discriminator)
Discriminator( (main): Sequential( (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False) (1): LeakyReLU(negative_slope=0.2, inplace=True) (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False) (3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (4): LeakyReLU(negative_slope=0.2, inplace=True) (5): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False) (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (7): LeakyReLU(negative_slope=0.2, inplace=True) (8): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False) (9): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (10): LeakyReLU(negative_slope=0.2, inplace=True) (11): Conv2d(512, 1, kernel_size=(4, 4), stride=(1, 1), bias=False) (12): Sigmoid() (13): Flatten(start_dim=1, end_dim=-1) ) )
summary(discriminator, (3,64,64))
---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 64, 32, 32] 3,072 LeakyReLU-2 [-1, 64, 32, 32] 0 Conv2d-3 [-1, 128, 16, 16] 131,072 BatchNorm2d-4 [-1, 128, 16, 16] 256 LeakyReLU-5 [-1, 128, 16, 16] 0 Conv2d-6 [-1, 256, 8, 8] 524,288 BatchNorm2d-7 [-1, 256, 8, 8] 512 LeakyReLU-8 [-1, 256, 8, 8] 0 Conv2d-9 [-1, 512, 4, 4] 2,097,152 BatchNorm2d-10 [-1, 512, 4, 4] 1,024 LeakyReLU-11 [-1, 512, 4, 4] 0 Conv2d-12 [-1, 1, 1, 1] 8,192 Sigmoid-13 [-1, 1, 1, 1] 0 Flatten-14 [-1, 1] 0 ================================================================ Total params: 2,765,568 Trainable params: 2,765,568 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.05 Forward/backward pass size (MB): 2.31 Params size (MB): 10.55 Estimated Total Size (MB): 12.91 ----------------------------------------------------------------
adversarial_loss = nn.BCELoss()
def generator_loss(fake_output, label): gen_loss = adversarial_loss(fake_output, label) #print(gen_loss) return gen_loss
def discriminator_loss(output, label): disc_loss = adversarial_loss(output, label) return disc_loss
fixed_noise = torch.randn(128, latent_dim, 1, 1, device=device) real_label = 1 fake_label = 0
learning_rate = 0.0002 G_optimizer = optim.Adam(generator.parameters(), lr = learning_rate, betas=(0.5, 0.999)) D_optimizer = optim.Adam(discriminator.parameters(), lr = learning_rate, betas=(0.5, 0.999))
num_epochs = 2 D_loss_plot, G_loss_plot = [], [] for epoch in range(1, num_epochs+1): D_loss_list, G_loss_list = [], [] for index, (real_images, _) in enumerate(train_loader): D_optimizer.zero_grad() real_images = real_images.to(device) real_target = Variable(torch.ones(real_images.size(0)).to(device)) fake_target = Variable(torch.zeros(real_images.size(0)).to(device)) real_target = real_target.unsqueeze(1) fake_target = fake_target.unsqueeze(1) D_real_loss = discriminator_loss(discriminator(real_images), real_target) # print(discriminator(real_images)) D_real_loss.backward() noise_vector = torch.randn(real_images.size(0), latent_dim, 1, 1, device=device) noise_vector = noise_vector.to(device) generated_image = generator(noise_vector) output = discriminator(generated_image.detach()) D_fake_loss = discriminator_loss(output, fake_target) # train with fake D_fake_loss.backward() D_total_loss = D_real_loss + D_fake_loss D_loss_list.append(D_total_loss) #D_total_loss.backward() D_optimizer.step() # Train generator with real labels G_optimizer.zero_grad() G_loss = generator_loss(discriminator(generated_image), real_target) G_loss_list.append(G_loss) G_loss.backward() G_optimizer.step() print('Epoch: [%d/%d]: D_loss: %.3f, G_loss: %.3f' % ( (epoch), num_epochs, torch.mean(torch.FloatTensor(D_loss_list)),\ torch.mean(torch.FloatTensor(G_loss_list)))) D_loss_plot.append(torch.mean(torch.FloatTensor(D_loss_list))) G_loss_plot.append(torch.mean(torch.FloatTensor(G_loss_list))) save_image(generated_image.data[:50], 'dcgan/torch/images/sample_%d'%epoch + '.png', nrow=5, normalize=True) torch.save(generator.state_dict(), 'dcgan/torch/training_weights/generator_epoch_%d.pth' % (epoch)) torch.save(discriminator.state_dict(), 'dcgan/torch/training_weights/discriminator_epoch_%d.pth' % (epoch))
Epoch: [1/2]: D_loss: 0.565, G_loss: 5.909 Epoch: [2/2]: D_loss: 0.445, G_loss: 5.984