diff --git a/BE2_GAN_and_cGAN.ipynb b/BE2_GAN_and_cGAN.ipynb index 7243c8909272f2e776695baf70e3aea2aa9b3aef..ecf92bf479a06035a47b5767745db34230eaa30e 100644 --- a/BE2_GAN_and_cGAN.ipynb +++ b/BE2_GAN_and_cGAN.ipynb @@ -70,14 +70,277 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "sIL7UvYAZx6L" - }, + "metadata": {}, "outputs": [], "source": [ - "#TO DO: your code here to adapt the code from the tutorial to experiment on MNIST dataset" + "# TODO: your code here to adapt the code from the tutorial to experiment on MNIST dataset\n", + "from __future__ import print_function\n", + "import argparse\n", + "import os\n", + "import random\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.parallel\n", + "import torch.backends.cudnn as cudnn\n", + "import torch.optim as optim\n", + "import torch.utils.data\n", + "import torchvision.datasets as dset\n", + "import torchvision.transforms as transforms\n", + "import torchvision.utils as vutils\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.animation as animation\n", + "from IPython.display import HTML\n", + "\n", + "# Set random seed for reproducibility\n", + "manualSeed = 999\n", + "#manualSeed = random.randint(1, 10000) # use if you want new results\n", + "print(\"Random Seed: \", manualSeed)\n", + "random.seed(manualSeed)\n", + "torch.manual_seed(manualSeed)\n", + "\n", + "# Root directory for dataset\n", + "dataroot = \"data/mnist\"\n", + "# Number of workers for dataloader\n", + "workers = 2\n", + "# Batch size during training\n", + "batch_size = 128\n", + "# Spatial size of training images. All images will be resized to this\n", + "# size using a transformer.\n", + "image_size = 64\n", + "# Number of channels in the training images. For color images this is 3\n", + "nc = 1\n", + "# Size of z latent vector (i.e. size of generator input)\n", + "nz = 100\n", + "# Size of feature maps in generator\n", + "ngf = 64\n", + "# Size of feature maps in discriminator\n", + "ndf = 64\n", + "# Number of training epochs\n", + "num_epochs = 5\n", + "# Learning rate for optimizers\n", + "lr = 0.0002\n", + "# Beta1 hyperparam for Adam optimizers\n", + "beta1 = 0.5\n", + "# Number of GPUs available. Use 0 for CPU mode.\n", + "ngpu = 1\n", + "\n", + "# We can use an image folder dataset the way we have it setup.\n", + "# Create the dataset\n", + "dataset = dset.MNIST(root=dataroot,\n", + " transform=transforms.Compose([\n", + " transforms.Resize(image_size),\n", + " transforms.CenterCrop(image_size),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.5,), (0.5,)),\n", + " ]))\n", + "# Create the dataloader\n", + "dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,\n", + " shuffle=True, num_workers=workers)\n", + "\n", + "# Decide which device we want to run on\n", + "device = torch.device(\"cuda:0\" if (torch.cuda.is_available() and ngpu > 0) else \"cpu\")\n", + "\n", + "# Plot some training images\n", + "real_batch = next(iter(dataloader))\n", + "plt.figure(figsize=(8,8))\n", + "plt.axis(\"off\")\n", + "plt.title(\"Training Images\")\n", + "plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))\n", + "\n", + "\n", + "# custom weights initialization called on netG and netD\n", + "def weights_init(m):\n", + " classname = m.__class__.__name__\n", + " if classname.find('Conv') != -1:\n", + " nn.init.normal_(m.weight.data, 0.0, 0.02)\n", + " elif classname.find('BatchNorm') != -1:\n", + " nn.init.normal_(m.weight.data, 1.0, 0.02)\n", + " nn.init.constant_(m.bias.data, 0)\n", + "\n", + "\n", + "# Generator Code\n", + "class Generator(nn.Module):\n", + " def __init__(self, ngpu):\n", + " super(Generator, self).__init__()\n", + " self.ngpu = ngpu\n", + " self.main = nn.Sequential(\n", + " # input is Z, going into a convolution\n", + " nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),\n", + " nn.BatchNorm2d(ngf * 8),\n", + " nn.ReLU(True),\n", + " # state size. (ngf*8) x 4 x 4\n", + " nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ngf * 4),\n", + " nn.ReLU(True),\n", + " # state size. (ngf*4) x 8 x 8\n", + " nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ngf * 2),\n", + " nn.ReLU(True),\n", + " # state size. (ngf*2) x 16 x 16\n", + " nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ngf),\n", + " nn.ReLU(True),\n", + " # state size. (ngf) x 32 x 32\n", + " nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),\n", + " nn.Tanh()\n", + " # state size. (nc) x 64 x 64\n", + " )\n", + "\n", + " def forward(self, input):\n", + " return self.main(input)\n", + "\n", + "class Discriminator(nn.Module):\n", + " def __init__(self, ngpu):\n", + " super(Discriminator, self).__init__()\n", + " self.ngpu = ngpu\n", + " self.main = nn.Sequential(\n", + " # input is (nc) x 64 x 64\n", + " nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),\n", + " nn.LeakyReLU(0.2, inplace=True),\n", + " # state size. (ndf) x 32 x 32\n", + " nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ndf * 2),\n", + " nn.LeakyReLU(0.2, inplace=True),\n", + " # state size. (ndf*2) x 16 x 16\n", + " nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ndf * 4),\n", + " nn.LeakyReLU(0.2, inplace=True),\n", + " # state size. (ndf*4) x 8 x 8\n", + " nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),\n", + " nn.BatchNorm2d(ndf * 8),\n", + " nn.LeakyReLU(0.2, inplace=True),\n", + " # state size. (ndf*8) x 4 x 4\n", + " nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),\n", + " nn.Sigmoid()\n", + " )\n", + "\n", + " def forward(self, input):\n", + " return self.main(input)\n", + " \n", + "\n", + "# Create the generator and Discirminator\n", + "netG = Generator(ngpu).to(device)\n", + "netD = Discriminator(ngpu).to(device)\n", + "\n", + "# Handle multi-gpu if desired\n", + "if (device.type == 'cuda') and (ngpu > 1):\n", + " netG = nn.DataParallel(netG, list(range(ngpu)))\n", + " netD = nn.DataParallel(netD, list(range(ngpu)))\n", + "\n", + "# Apply the weights_init function to randomly initialize all weights\n", + "# to mean=0, stdev=0.02.\n", + "netG.apply(weights_init)\n", + "netD.apply(weights_init)\n", + "\n", + "# Print the models\n", + "print(netG)\n", + "print(netD)\n", + "\n", + "# Initialize BCELoss function\n", + "criterion = nn.BCELoss()\n", + "\n", + "# Create batch of latent vectors that we will use to visualize\n", + "# the progression of the generator\n", + "fixed_noise = torch.randn(64, nz, 1, 1, device=device)\n", + "\n", + "# Establish convention for real and fake labels during training\n", + "real_label = 1.\n", + "fake_label = 0.\n", + "\n", + "# Setup Adam optimizers for both G and D\n", + "optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))\n", + "optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))\n", + "\n", + "\n", + "# Training Loop\n", + "\n", + "# Lists to keep track of progress\n", + "img_list = []\n", + "G_losses = []\n", + "D_losses = []\n", + "iters = 0\n", + "\n", + "print(\"Starting Training Loop...\")\n", + "# For each epoch\n", + "for epoch in range(num_epochs):\n", + " # For each batch in the dataloader\n", + " for i, data in enumerate(dataloader, 0):\n", + "\n", + " ############################\n", + " # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))\n", + " ###########################\n", + " ## Train with all-real batch\n", + " netD.zero_grad()\n", + " # Format batch\n", + " real_cpu = data[0].to(device)\n", + " b_size = real_cpu.size(0)\n", + " label = torch.full((b_size,), real_label, dtype=torch.float, device=device)\n", + " # Forward pass real batch through D\n", + " output = netD(real_cpu).view(-1)\n", + " # Calculate loss on all-real batch\n", + " errD_real = criterion(output, label)\n", + " # Calculate gradients for D in backward pass\n", + " errD_real.backward()\n", + " D_x = output.mean().item()\n", + "\n", + " ## Train with all-fake batch\n", + " # Generate batch of latent vectors\n", + " noise = torch.randn(b_size, nz, 1, 1, device=device)\n", + " # Generate fake image batch with G\n", + " fake = netG(noise)\n", + " label.fill_(fake_label)\n", + " # Classify all fake batch with D\n", + " output = netD(fake.detach()).view(-1)\n", + " # Calculate D's loss on the all-fake batch\n", + " errD_fake = criterion(output, label)\n", + " # Calculate the gradients for this batch, accumulated (summed) with previous gradients\n", + " errD_fake.backward()\n", + " D_G_z1 = output.mean().item()\n", + " # Compute error of D as sum over the fake and the real batches\n", + " errD = errD_real + errD_fake\n", + " # Update D\n", + " optimizerD.step()\n", + "\n", + " ############################\n", + " # (2) Update G network: maximize log(D(G(z)))\n", + " ###########################\n", + " netG.zero_grad()\n", + " label.fill_(real_label) # fake labels are real for generator cost\n", + " # Since we just updated D, perform another forward pass of all-fake batch through D\n", + " output = netD(fake).view(-1)\n", + " # Calculate G's loss based on this output\n", + " errG = criterion(output, label)\n", + " # Calculate gradients for G\n", + " errG.backward()\n", + " D_G_z2 = output.mean().item()\n", + " # Update G\n", + " optimizerG.step()\n", + "\n", + " # Output training stats\n", + " if i % 50 == 0:\n", + " print('[%d/%d][%d/%d]\\tLoss_D: %.4f\\tLoss_G: %.4f\\tD(x): %.4f\\tD(G(z)): %.4f / %.4f'\n", + " % (epoch, num_epochs, i, len(dataloader),\n", + " errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))\n", + "\n", + " # Save Losses for plotting later\n", + " G_losses.append(errG.item())\n", + " D_losses.append(errD.item())\n", + "\n", + " # Check how the generator is doing by saving G's output on fixed_noise\n", + " if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):\n", + " with torch.no_grad():\n", + " fake = netG(fixed_noise).detach().cpu()\n", + " img_list.append(vutils.make_grid(fake, padding=2, normalize=True))\n", + "\n", + " iters += 1\n", + "\n", + "fig = plt.figure(figsize=(8,8))\n", + "plt.axis(\"off\")\n", + "ims = [[plt.imshow(np.transpose(i,(1,2,0)), animated=True)] for i in img_list]\n", + "ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True)\n", + "\n", + "HTML(ani.to_jshtml())" ] }, { @@ -91,6 +354,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -105,10 +369,12 @@ "As we are not only trying to generate a random picture but a mapping between a picture to another one, we can't use the standard GAN architecture. We will then use a cGAN.\n", "\n", "A cGAN is a supervised GAN aiming at mapping a label picture to a real one or a real picture to a label one. As you can see in the diagram below, the discriminator will take as input a pair of images and try to predict if the pair was generated or not. The generator will not only generate an image from noise but will also use an image (label or real) to generate another one (real or label).\n", + "\n", "\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -118,6 +384,7 @@ "### Generator\n", "\n", "In the cGAN architecture, the generator chosen is a U-Net.\n", + "\n", "\n", "\n", "A U-Net takes as input an image, and outputs another image. \n", @@ -132,6 +399,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -139,6 +407,7 @@ }, "source": [ "The architecture we will implement is the following (the number in the square is the number of filters used).\n", + "\n", "\n", "\n", "The encoder will take as input a colored picture (3 channels: RGB), it will pass through a series of convolution layers to encode the features of the picture. It will then be decoded by the decoder using transposed convolutional layers. These layers will take as input the previous decoded vector AND the encoded features of the same level. " @@ -296,7 +565,7 @@ }, "outputs": [], "source": [ - " class U_Net(nn.Module):\n", + "class U_Net(nn.Module):\n", " ''' \n", " Ck denotes a Convolution-BatchNorm-ReLU layer with k filters.\n", " CDk denotes a Convolution-BatchNorm-Dropout-ReLU layer with a dropout rate of 50%\n", @@ -307,20 +576,32 @@ " '''\n", " def __init__(self, n_channels, n_classes):\n", " super(U_Net, self).__init__()\n", + "\n", " # Encoder\n", - " self.inc = inconv(n_channels, 64) # 64 filters\n", " # TO DO :\n", " # Create the 7 encoder layers called \"down1\" to \"down7\" following this sequence\n", " # C64 - C128 - C256 - C512 - C512 - C512 - C512 - C512\n", - " # The first one has already been implemented\n", - " \n", + " self.inc = inconv(n_channels, 64) # 64 filters\n", + "\n", + " self.down1 = down(64, 128)\n", + " self.down2 = down(128, 256)\n", + " self.down3 = down(256, 512)\n", + " self.down4 = down(512, 512)\n", + " self.down5 = down(512, 512)\n", + " self.down6 = down(512, 512)\n", + " self.down7 = down(512, 512)\n", " \n", " # Decoder\n", " # TO DO :\n", " # Create the 7 decoder layers called up1 to up7 following this sequence :\n", " # CD512 - CD1024 - CD1024 - C1024 - C1024 - C512 - C256 - C128\n", - " # The last layer has already been defined\n", - " \n", + " self.up7 = up(512, 512, dropout=True)\n", + " self.up6 = up(1024, 512, dropout=True)\n", + " self.up5 = up(1024, 512, dropout=True)\n", + " self.up4 = up(1024, 512)\n", + " self.up3 = up(1024, 256)\n", + " self.up2 = up(512, 128)\n", + " self.up1 = up(256, 64)\n", " \n", " self.outc = outconv(128, n_classes) # 128 filters\n", "\n", @@ -372,6 +653,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -379,12 +661,39 @@ }, "source": [ "<font color='red'>**Question 1**</font> \n", - "Knowing the input and output images will be 256x256, what will be the dimension of the encoded vector x8 ?\n", - "\n", + "Knowing the input and output images will be 256x256, what will be the dimension of the encoded vector x8 ?" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Each down layer reduces the spatial dimensions of the input tensor by a factor of 2 (due to the use of a 4x4 convolutional kernel with a stride of 2), and increases the number of channels to 512. After 7 down layers + the input layer, the spatial dimensions will have been reduced by a factor of 2^8 = 256, resulting in a tensor with spatial dimensions of 256/256 = 1 and 512 channels. Therefore, the total number of elements in the tensor will be 1x1x512 = 512, which is the size of the encoded vector x8." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ "<font color='red'>**Question 2**</font> \n", "As you can see, U-net has an encoder-decoder architecture with skip connections. Explain why it works better than a traditional encoder-decoder." ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "The skip connections in U-Net allow the network to retain information from earlier layers that would otherwise be lost in the encoding process. By connecting the output of each encoding layer to the corresponding decoding layer, the U-Net recovers fine-grained spatial details and contours that may have been lost during downsampling.\n", + "\n", + "This enables better feature propagation from the encoder to the decoder, and improves the quality of the output by incorporating high-resolution information from the input image. Additionally, skip connections also reduce the risk of overfitting, since they allow the network to effectively use features from earlier layers that may not have been used otherwise.\n", + "\n", + "U-Net architecture can handle variable-sized inputs and can generate outputs with different resolutions than the input image, which makes it more flexible and versatile than traditional encoder-decoder." + ] + }, { "cell_type": "markdown", "metadata": { @@ -487,6 +796,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "colab_type": "text", @@ -494,9 +804,15 @@ }, "source": [ "<font color='red'>**Question 3**</font> \n", - "Knowing the input and output images will be 256x256, what will be the dimension of the encoded vector x8 ?Knowing input images will be 256x256 with 3 channels each, how many parameters are there to learn ?" + "Knowing the input and output images will be 256x256, what will be the dimension of the encoded vector x8 ? Knowing input images will be 256x256 with 3 channels each, how many parameters are there to learn ?" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -512,10 +828,10 @@ " super(PatchGAN, self).__init__()\n", " # TODO :\n", " # create the 4 first layers named conv1 to conv4\n", - " self.conv1 =\n", - " self.conv2 =\n", - " self.conv3 =\n", - " self.conv4 =\n", + " self.conv1 = conv_block(n_channels, 64, use_batchnorm=False, stride=2)\n", + " self.conv2 = conv_block(64, 128, use_batchnorm=True, stride=2)\n", + " self.conv3 = conv_block(128, 256, use_batchnorm=True, stride=2)\n", + " self.conv4 = conv_block(256, 512, use_batchnorm=True, stride=1)\n", " # output layer\n", " self.out = out_block(512, n_classes)\n", " \n", @@ -930,7 +1246,7 @@ "# ----------\n", "\n", "losses = []\n", - "num_epochs = 200\n", + "num_epochs = 201\n", "\n", "# Initialize weights\n", "generator.apply(weights_init_normal)\n", @@ -962,12 +1278,17 @@ "\n", " # GAN loss\n", " # TO DO: Put here your GAN loss\n", + " fake_A = generator(real_B)\n", + " pred_fake = discriminator(fake_A.detach(), real_B)\n", + " loss_GAN = criterion_GAN(pred_fake, valid)\n", "\n", " # Pixel-wise loss\n", " # TO DO: Put here your pixel loss\n", + " loss_pixelwise = criterion_pixelwise(real_A, fake_A)\n", "\n", " # Total loss\n", " # TO DO: Put here your total loss\n", + " loss_G = loss_GAN + lambda_pixel * loss_pixelwise\n", "\n", " loss_G.backward()\n", "\n", @@ -1003,7 +1324,7 @@ " losses.append((loss_D.item(), loss_G.item()))\n", " if epoch % 100 == 0:\n", " print('Saving model...')\n", - " save_model(epoch)\n" + " save_model(epoch)" ] }, { @@ -1170,37 +1491,71 @@ }, "outputs": [], "source": [ - "# TO DO : Your code here to load and evaluate with a few samples\n", - "# a model after 100 epochs\n", - "\n" + "# TO DO : Your code here to load and evaluate with a few samples a model after 100 epochs\n", + "\n", + "load_model(epoch=100)\n", + "# switching mode\n", + "generator.eval()\n", + "\n", + "# show a sample evaluation image on the validation dataset\n", + "image, mask = next(iter(val_dataloader))\n", + "output = generator(mask.type(Tensor))\n", + "output = output.view(8, 3, 256, 256)\n", + "output = output.cpu().detach()\n", + "for i in range(8):\n", + " image_plot = reverse_transform(image[i])\n", + " output_plot = reverse_transform(output[i])\n", + " mask_plot = reverse_transform(mask[i])\n", + " plot2x3Array(mask_plot,image_plot,output_plot)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "_GbMIfRXBUhH" - }, + "metadata": {}, "outputs": [], "source": [ - "# And finally :\n", - "if cuda:\n", - " torch.cuda.empty_cache()" + "# Visualize the results after 200 epochs\n", + "\n", + "load_model(200)\n", + "# switching to evaluation mode\n", + "generator.eval()\n", + "\n", + "image, mask = next(iter(val_dataloader))\n", + "output200 = generator(mask.type(Tensor))\n", + "output200 = output200.view(8, 3, 256, 256)\n", + "output200 = output200.cpu().detach()\n", + "\n", + "print(\"After 200 epochs:\")\n", + "for i in range(3):\n", + " image_plot = reverse_transform(image[i])\n", + " output_plot = reverse_transform(output200[i])\n", + " mask_plot = reverse_transform(mask[i])\n", + " plot2x3Array(mask_plot,image_plot,output_plot)" ] }, { "attachments": {}, "cell_type": "markdown", + "metadata": {}, + "source": [ + "The difference between the 100-epochs trained model and 200-epoches trained model are not obvious.\n", + "There is a little difference" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { - "colab_type": "text", - "id": "rVxSSPJgK60P" + "colab": {}, + "colab_type": "code", + "id": "_GbMIfRXBUhH" }, + "outputs": [], "source": [ - "# How to submit your Work ?\n", - "\n", - "This work must be done individually. The expected output is a repository named gan-cgan on https://gitlab.ec-lyon.fr. It must contain your notebook (or python files) and a README.md file that explains briefly the successive steps of the project. The last commit is due before 11:59 pm on Wednesday, March 29, 2023. Subsequent commits will not be considered." + "# And finally :\n", + "if cuda:\n", + " torch.cuda.empty_cache()" ] } ],