diff --git a/TD2_Deep_Learning-2.ipynb b/TD2_Deep_Learning-2.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..7fe9af3c70995528f7d5e767ae69dbadde0b9545
--- /dev/null
+++ b/TD2_Deep_Learning-2.ipynb
@@ -0,0 +1,3039 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "7edf7168",
+      "metadata": {
+        "id": "7edf7168"
+      },
+      "source": [
+        "# TD2: Deep learning"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "fbb8c8df",
+      "metadata": {
+        "id": "fbb8c8df"
+      },
+      "source": [
+        "In this TD, you must modify this notebook to answer the questions. To do this,\n",
+        "\n",
+        "1. Fork this repository\n",
+        "2. Clone your forked repository on your local computer\n",
+        "3. Answer the questions\n",
+        "4. Commit and push regularly\n",
+        "\n",
+        "The last commit is due on Wednesday, December 4, 11:59 PM. Later commits will not be taken into account."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "3d167a29",
+      "metadata": {
+        "id": "3d167a29"
+      },
+      "source": [
+        "Install and test PyTorch from  https://pytorch.org/get-started/locally."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "330a42f5",
+      "metadata": {
+        "id": "330a42f5"
+      },
+      "outputs": [],
+      "source": [
+        "#pip install torch torchvision"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "0882a636",
+      "metadata": {
+        "id": "0882a636"
+      },
+      "source": [
+        "\n",
+        "To test run the following code"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "id": "b1950f0a",
+      "metadata": {
+        "id": "b1950f0a",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "1f2d576a-0c79-4dfa-8bb8-c2fbdff89ff8"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "tensor([[-2.9149e-01, -2.9791e-01, -1.5167e+00, -1.3063e+00, -2.0891e+00,\n",
+            "          2.0112e+00, -1.3604e+00, -1.5728e+00, -4.2890e-03, -5.9501e-01],\n",
+            "        [-3.8356e-01, -1.3039e+00, -1.6887e+00, -4.2747e-01,  2.6056e-01,\n",
+            "         -1.7091e+00, -9.3667e-01, -6.9102e-01, -1.2756e-02, -3.4600e-01],\n",
+            "        [ 4.9706e-01, -8.0328e-01, -1.7984e-01,  3.6717e-01, -9.8263e-01,\n",
+            "         -1.1355e-02,  3.1077e-01,  9.0327e-01,  1.1605e+00,  3.6590e-01],\n",
+            "        [-1.4779e+00, -5.5587e-01,  4.2844e-03, -5.6485e-01, -2.2629e-01,\n",
+            "         -6.9693e-01,  6.7294e-01,  5.0569e-01,  9.2654e-01,  1.7701e+00],\n",
+            "        [-2.0379e+00,  9.2532e-01,  1.3645e+00,  2.0570e+00, -5.9445e-02,\n",
+            "          1.1029e+00, -6.6467e-01, -2.4736e+00,  3.5116e-01, -1.1571e+00],\n",
+            "        [-7.7662e-02, -5.0340e-01,  4.3923e-01,  7.0893e-01,  6.5542e-01,\n",
+            "         -3.3269e-01, -5.6805e-01, -3.0578e-01,  4.7772e-01, -7.4755e-01],\n",
+            "        [-9.3990e-01,  1.4306e+00, -1.2102e+00,  9.0100e-01, -6.5693e-01,\n",
+            "          5.6183e-01, -1.7710e+00,  1.7879e-01, -1.5684e+00, -3.7099e-01],\n",
+            "        [-3.7614e-02, -1.8091e-04,  7.7279e-01, -2.2848e-01, -8.2718e-01,\n",
+            "         -1.7331e-01,  1.1209e-01, -2.1531e+00,  4.5847e-01, -3.0474e-01],\n",
+            "        [-1.4236e+00, -3.1073e-01, -4.6860e-01,  8.1234e-01,  1.3746e-01,\n",
+            "         -1.3021e+00,  1.3348e+00,  9.4073e-01,  3.3143e-01,  6.5072e-01],\n",
+            "        [-1.1577e-01, -5.1197e-01,  1.5164e-01, -9.8843e-03, -5.9513e-01,\n",
+            "         -1.2165e-01,  1.9136e+00, -1.2140e+00,  6.9562e-01, -2.2402e-01],\n",
+            "        [ 1.4074e+00, -4.5704e-01,  1.7650e-01, -7.5354e-01, -1.3587e+00,\n",
+            "          5.9109e-01, -8.4399e-01,  7.1660e-01,  6.4473e-01,  5.4493e-01],\n",
+            "        [-1.1563e+00,  9.9369e-01, -1.3648e-01, -2.8204e+00,  2.4360e-01,\n",
+            "         -7.4776e-01,  5.0526e-01,  1.0538e+00, -1.4164e+00, -1.1326e-01],\n",
+            "        [ 6.1103e-01, -2.5303e-01, -1.0949e+00,  9.7218e-01, -2.0042e-01,\n",
+            "         -3.8988e-01, -1.3344e-01, -8.1450e-01, -3.0165e-01,  1.7991e+00],\n",
+            "        [-7.1659e-01, -3.7019e-01, -4.1402e-01, -1.6006e+00, -1.3978e+00,\n",
+            "         -1.5548e-01, -9.8952e-01,  5.5218e-01,  4.0877e-01, -1.6947e+00]])\n",
+            "AlexNet(\n",
+            "  (features): Sequential(\n",
+            "    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))\n",
+            "    (1): ReLU(inplace=True)\n",
+            "    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+            "    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n",
+            "    (4): ReLU(inplace=True)\n",
+            "    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+            "    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+            "    (7): ReLU(inplace=True)\n",
+            "    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+            "    (9): ReLU(inplace=True)\n",
+            "    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+            "    (11): ReLU(inplace=True)\n",
+            "    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+            "  )\n",
+            "  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))\n",
+            "  (classifier): Sequential(\n",
+            "    (0): Dropout(p=0.5, inplace=False)\n",
+            "    (1): Linear(in_features=9216, out_features=4096, bias=True)\n",
+            "    (2): ReLU(inplace=True)\n",
+            "    (3): Dropout(p=0.5, inplace=False)\n",
+            "    (4): Linear(in_features=4096, out_features=4096, bias=True)\n",
+            "    (5): ReLU(inplace=True)\n",
+            "    (6): Linear(in_features=4096, out_features=1000, bias=True)\n",
+            "  )\n",
+            ")\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch\n",
+        "\n",
+        "N, D = 14, 10\n",
+        "x = torch.randn(N, D).type(torch.FloatTensor)\n",
+        "print(x)\n",
+        "\n",
+        "from torchvision import models\n",
+        "\n",
+        "alexnet = models.alexnet()\n",
+        "print(alexnet)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "23f266da",
+      "metadata": {
+        "id": "23f266da"
+      },
+      "source": [
+        "## Exercise 1: CNN on CIFAR10\n",
+        "\n",
+        "The goal is to apply a Convolutional Neural Net (CNN) model on the CIFAR10 image dataset and test the accuracy of the model on the basis of image classification. Compare the Accuracy VS the neural network implemented during TD1.\n",
+        "\n",
+        "Have a look at the following documentation to be familiar with PyTorch.\n",
+        "\n",
+        "https://pytorch.org/tutorials/beginner/pytorch_with_examples.html\n",
+        "\n",
+        "https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "4ba1c82d",
+      "metadata": {
+        "id": "4ba1c82d"
+      },
+      "source": [
+        "You can test if GPU is available on your machine and thus train on it to speed up the process"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "id": "6e18f2fd",
+      "metadata": {
+        "id": "6e18f2fd",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "8345a666-e787-499a-8861-30cd663ba3c0"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "CUDA is available!  Training on GPU ...\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch\n",
+        "\n",
+        "# check if CUDA is available\n",
+        "train_on_gpu = torch.cuda.is_available()\n",
+        "\n",
+        "if not train_on_gpu:\n",
+        "    print(\"CUDA is not available.  Training on CPU ...\")\n",
+        "else:\n",
+        "    print(\"CUDA is available!  Training on GPU ...\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "5cf214eb",
+      "metadata": {
+        "id": "5cf214eb"
+      },
+      "source": [
+        "Next we load the CIFAR10 dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "id": "462666a2",
+      "metadata": {
+        "id": "462666a2",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "a63aa205-2bce-46c1-97c3-8cd9a2977bbb"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 170M/170M [00:14<00:00, 12.0MB/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Extracting data/cifar-10-python.tar.gz to data\n",
+            "Files already downloaded and verified\n"
+          ]
+        }
+      ],
+      "source": [
+        "import numpy as np\n",
+        "from torchvision import datasets, transforms\n",
+        "from torch.utils.data.sampler import SubsetRandomSampler\n",
+        "\n",
+        "# number of subprocesses to use for data loading\n",
+        "num_workers = 0\n",
+        "# how many samples per batch to load\n",
+        "batch_size = 20\n",
+        "# percentage of training set to use as validation\n",
+        "valid_size = 0.2\n",
+        "\n",
+        "# convert data to a normalized torch.FloatTensor\n",
+        "transform = transforms.Compose(\n",
+        "    [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]\n",
+        ")\n",
+        "\n",
+        "# choose the training and test datasets\n",
+        "train_data = datasets.CIFAR10(\"data\", train=True, download=True, transform=transform)\n",
+        "test_data = datasets.CIFAR10(\"data\", train=False, download=True, transform=transform)\n",
+        "\n",
+        "# obtain training indices that will be used for validation\n",
+        "num_train = len(train_data)\n",
+        "indices = list(range(num_train))\n",
+        "np.random.shuffle(indices)\n",
+        "split = int(np.floor(valid_size * num_train))\n",
+        "train_idx, valid_idx = indices[split:], indices[:split]\n",
+        "\n",
+        "# define samplers for obtaining training and validation batches\n",
+        "train_sampler = SubsetRandomSampler(train_idx)\n",
+        "valid_sampler = SubsetRandomSampler(valid_idx)\n",
+        "\n",
+        "# prepare data loaders (combine dataset and sampler)\n",
+        "train_loader = torch.utils.data.DataLoader(\n",
+        "    train_data, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers\n",
+        ")\n",
+        "valid_loader = torch.utils.data.DataLoader(\n",
+        "    train_data, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers\n",
+        ")\n",
+        "test_loader = torch.utils.data.DataLoader(\n",
+        "    test_data, batch_size=batch_size, num_workers=num_workers\n",
+        ")\n",
+        "\n",
+        "# specify the image classes\n",
+        "classes = [\n",
+        "    \"airplane\",\n",
+        "    \"automobile\",\n",
+        "    \"bird\",\n",
+        "    \"cat\",\n",
+        "    \"deer\",\n",
+        "    \"dog\",\n",
+        "    \"frog\",\n",
+        "    \"horse\",\n",
+        "    \"ship\",\n",
+        "    \"truck\",\n",
+        "]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "58ec3903",
+      "metadata": {
+        "id": "58ec3903"
+      },
+      "source": [
+        "CNN definition (this one is an example)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "id": "317bf070",
+      "metadata": {
+        "id": "317bf070",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "0c3a0a99-8c7c-49bf-a2b2-57212a45b6a1"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Net(\n",
+            "  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))\n",
+            "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+            "  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
+            "  (fc1): Linear(in_features=400, out_features=120, bias=True)\n",
+            "  (fc2): Linear(in_features=120, out_features=84, bias=True)\n",
+            "  (fc3): Linear(in_features=84, out_features=10, bias=True)\n",
+            ")\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch.nn as nn\n",
+        "import torch.nn.functional as F\n",
+        "\n",
+        "# define the CNN architecture\n",
+        "\n",
+        "\n",
+        "class Net(nn.Module):\n",
+        "    def __init__(self):\n",
+        "        super(Net, self).__init__()\n",
+        "        self.conv1 = nn.Conv2d(3, 6, 5)\n",
+        "        self.pool = nn.MaxPool2d(2, 2)\n",
+        "        self.conv2 = nn.Conv2d(6, 16, 5)\n",
+        "        self.fc1 = nn.Linear(16 * 5 * 5, 120)\n",
+        "        self.fc2 = nn.Linear(120, 84)\n",
+        "        self.fc3 = nn.Linear(84, 10)\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        x = self.pool(F.relu(self.conv1(x)))\n",
+        "        x = self.pool(F.relu(self.conv2(x)))\n",
+        "        x = x.view(-1, 16 * 5 * 5)\n",
+        "        x = F.relu(self.fc1(x))\n",
+        "        x = F.relu(self.fc2(x))\n",
+        "        x = self.fc3(x)\n",
+        "        return x\n",
+        "\n",
+        "\n",
+        "# create a complete CNN\n",
+        "model = Net()\n",
+        "print(model)\n",
+        "# move tensors to GPU if CUDA is available\n",
+        "if train_on_gpu:\n",
+        "    model.cuda()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a2dc4974",
+      "metadata": {
+        "id": "a2dc4974"
+      },
+      "source": [
+        "Loss function and training using SGD (Stochastic Gradient Descent) optimizer"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 19,
+      "id": "4b53f229",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4b53f229",
+        "outputId": "352fa2f2-4cf7-42b6-e51a-c68bf428e90c"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch: 0 \tTraining Loss: 35.027589 \tValidation Loss: 31.574753\n",
+            "Validation loss decreased (inf --> 31.574753).  Saving model ...\n",
+            "Epoch: 1 \tTraining Loss: 30.186101 \tValidation Loss: 29.019872\n",
+            "Validation loss decreased (31.574753 --> 29.019872).  Saving model ...\n",
+            "Epoch: 2 \tTraining Loss: 27.864272 \tValidation Loss: 27.127275\n",
+            "Validation loss decreased (29.019872 --> 27.127275).  Saving model ...\n",
+            "Epoch: 3 \tTraining Loss: 26.258431 \tValidation Loss: 26.395611\n",
+            "Validation loss decreased (27.127275 --> 26.395611).  Saving model ...\n",
+            "Epoch: 4 \tTraining Loss: 24.996900 \tValidation Loss: 25.757890\n",
+            "Validation loss decreased (26.395611 --> 25.757890).  Saving model ...\n",
+            "Epoch: 5 \tTraining Loss: 23.965784 \tValidation Loss: 25.668354\n",
+            "Validation loss decreased (25.757890 --> 25.668354).  Saving model ...\n",
+            "Epoch: 6 \tTraining Loss: 23.020208 \tValidation Loss: 24.161515\n",
+            "Validation loss decreased (25.668354 --> 24.161515).  Saving model ...\n",
+            "Epoch: 7 \tTraining Loss: 22.237902 \tValidation Loss: 23.424892\n",
+            "Validation loss decreased (24.161515 --> 23.424892).  Saving model ...\n",
+            "Epoch: 8 \tTraining Loss: 21.415062 \tValidation Loss: 23.035766\n",
+            "Validation loss decreased (23.424892 --> 23.035766).  Saving model ...\n",
+            "Epoch: 9 \tTraining Loss: 20.656158 \tValidation Loss: 22.670803\n",
+            "Validation loss decreased (23.035766 --> 22.670803).  Saving model ...\n",
+            "Epoch: 10 \tTraining Loss: 19.973196 \tValidation Loss: 23.388851\n",
+            "Epoch: 11 \tTraining Loss: 19.298960 \tValidation Loss: 22.264116\n",
+            "Validation loss decreased (22.670803 --> 22.264116).  Saving model ...\n",
+            "Epoch: 12 \tTraining Loss: 18.639930 \tValidation Loss: 22.163565\n",
+            "Validation loss decreased (22.264116 --> 22.163565).  Saving model ...\n",
+            "Epoch: 13 \tTraining Loss: 18.014738 \tValidation Loss: 22.538633\n",
+            "Epoch: 14 \tTraining Loss: 17.396607 \tValidation Loss: 23.147467\n",
+            "Epoch: 15 \tTraining Loss: 16.883292 \tValidation Loss: 22.109338\n",
+            "Validation loss decreased (22.163565 --> 22.109338).  Saving model ...\n",
+            "Epoch: 16 \tTraining Loss: 16.285820 \tValidation Loss: 21.778137\n",
+            "Validation loss decreased (22.109338 --> 21.778137).  Saving model ...\n",
+            "Epoch: 17 \tTraining Loss: 15.763043 \tValidation Loss: 22.761066\n",
+            "Epoch: 18 \tTraining Loss: 15.239761 \tValidation Loss: 23.015333\n",
+            "Epoch: 19 \tTraining Loss: 14.686504 \tValidation Loss: 24.884198\n",
+            "Epoch: 20 \tTraining Loss: 14.331884 \tValidation Loss: 23.479585\n",
+            "Epoch: 21 \tTraining Loss: 13.738545 \tValidation Loss: 23.654407\n",
+            "Epoch: 22 \tTraining Loss: 13.280116 \tValidation Loss: 24.813238\n",
+            "Epoch: 23 \tTraining Loss: 12.799162 \tValidation Loss: 24.736943\n",
+            "Epoch: 24 \tTraining Loss: 12.397551 \tValidation Loss: 25.655078\n",
+            "Epoch: 25 \tTraining Loss: 11.895732 \tValidation Loss: 26.784175\n",
+            "Epoch: 26 \tTraining Loss: 11.552095 \tValidation Loss: 26.599335\n",
+            "Epoch: 27 \tTraining Loss: 11.088106 \tValidation Loss: 27.633717\n",
+            "Epoch: 28 \tTraining Loss: 10.653680 \tValidation Loss: 27.694947\n",
+            "Epoch: 29 \tTraining Loss: 10.327371 \tValidation Loss: 27.795613\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch.optim as optim\n",
+        "\n",
+        "criterion = nn.CrossEntropyLoss()  # specify loss function\n",
+        "optimizer = optim.SGD(model.parameters(), lr=0.01)  # specify optimizer\n",
+        "\n",
+        "n_epochs = 30  # number of epochs to train the model\n",
+        "train_loss_list = []  # list to store loss to visualize\n",
+        "valid_loss_min = np.Inf  # track change in validation loss\n",
+        "\n",
+        "for epoch in range(n_epochs):\n",
+        "    # Keep track of training and validation loss\n",
+        "    train_loss = 0.0\n",
+        "    valid_loss = 0.0\n",
+        "\n",
+        "    # Train the model\n",
+        "    model.train()\n",
+        "    for data, target in train_loader:\n",
+        "        # Move tensors to GPU if CUDA is available\n",
+        "        if train_on_gpu:\n",
+        "            data, target = data.cuda(), target.cuda()\n",
+        "        # Clear the gradients of all optimized variables\n",
+        "        optimizer.zero_grad()\n",
+        "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+        "        output = model(data)\n",
+        "        # Calculate the batch loss\n",
+        "        loss = criterion(output, target)\n",
+        "        # Backward pass: compute gradient of the loss with respect to model parameters\n",
+        "        loss.backward()\n",
+        "        # Perform a single optimization step (parameter update)\n",
+        "        optimizer.step()\n",
+        "        # Update training loss\n",
+        "        train_loss += loss.item() * data.size(0)\n",
+        "\n",
+        "    # Validate the model\n",
+        "    model.eval()\n",
+        "    for data, target in valid_loader:\n",
+        "        # Move tensors to GPU if CUDA is available\n",
+        "        if train_on_gpu:\n",
+        "            data, target = data.cuda(), target.cuda()\n",
+        "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+        "        output = model(data)\n",
+        "        # Calculate the batch loss\n",
+        "        loss = criterion(output, target)\n",
+        "        # Update average validation loss\n",
+        "        valid_loss += loss.item() * data.size(0)\n",
+        "\n",
+        "    # Calculate average losses\n",
+        "    train_loss = train_loss / len(train_loader)\n",
+        "    valid_loss = valid_loss / len(valid_loader)\n",
+        "    train_loss_list.append(train_loss)\n",
+        "\n",
+        "    # Print training/validation statistics\n",
+        "    print(\n",
+        "        \"Epoch: {} \\tTraining Loss: {:.6f} \\tValidation Loss: {:.6f}\".format(\n",
+        "            epoch, train_loss, valid_loss\n",
+        "        )\n",
+        "    )\n",
+        "\n",
+        "    # Save model if validation loss has decreased\n",
+        "    if valid_loss <= valid_loss_min:\n",
+        "        print(\n",
+        "            \"Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...\".format(\n",
+        "                valid_loss_min, valid_loss\n",
+        "            )\n",
+        "        )\n",
+        "        torch.save(model.state_dict(), \"/content/save_data/model_cifar.pt\")\n",
+        "        valid_loss_min = valid_loss"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "13e1df74",
+      "metadata": {
+        "id": "13e1df74"
+      },
+      "source": [
+        "Does overfit occur? If so, do an early stopping."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 20,
+      "id": "d39df818",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 472
+        },
+        "id": "d39df818",
+        "outputId": "8deb6712-7903-4899-9709-9be0e022e600"
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 640x480 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "plt.plot(range(n_epochs), train_loss_list)\n",
+        "plt.xlabel(\"Epoch\")\n",
+        "plt.ylabel(\"Loss\")\n",
+        "plt.title(\"Performance of Model 1\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "11df8fd4",
+      "metadata": {
+        "id": "11df8fd4"
+      },
+      "source": [
+        "Now loading the model with the lowest validation loss value\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 21,
+      "id": "e93efdfc",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "e93efdfc",
+        "outputId": "25166486-06b8-4855-fc5d-a70f4253ee31"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-21-022d8c474780>:1: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+            "  model.load_state_dict(torch.load(\"/content/save_data/model_cifar.pt\"))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Test Loss: 22.087782\n",
+            "\n",
+            "Test Accuracy of airplane: 64% (640/1000)\n",
+            "Test Accuracy of automobile: 74% (741/1000)\n",
+            "Test Accuracy of  bird: 52% (528/1000)\n",
+            "Test Accuracy of   cat: 45% (455/1000)\n",
+            "Test Accuracy of  deer: 53% (539/1000)\n",
+            "Test Accuracy of   dog: 48% (482/1000)\n",
+            "Test Accuracy of  frog: 76% (766/1000)\n",
+            "Test Accuracy of horse: 69% (693/1000)\n",
+            "Test Accuracy of  ship: 72% (729/1000)\n",
+            "Test Accuracy of truck: 67% (679/1000)\n",
+            "\n",
+            "Test Accuracy (Overall): 62% (6252/10000)\n"
+          ]
+        }
+      ],
+      "source": [
+        "model.load_state_dict(torch.load(\"/content/save_data/model_cifar.pt\"))\n",
+        "\n",
+        "# track test loss\n",
+        "test_loss = 0.0\n",
+        "class_correct = list(0.0 for i in range(10))\n",
+        "class_total = list(0.0 for i in range(10))\n",
+        "\n",
+        "model.eval()\n",
+        "# iterate over test data\n",
+        "for data, target in test_loader:\n",
+        "    # move tensors to GPU if CUDA is available\n",
+        "    if train_on_gpu:\n",
+        "        data, target = data.cuda(), target.cuda()\n",
+        "    # forward pass: compute predicted outputs by passing inputs to the model\n",
+        "    output = model(data)\n",
+        "    # calculate the batch loss\n",
+        "    loss = criterion(output, target)\n",
+        "    # update test loss\n",
+        "    test_loss += loss.item() * data.size(0)\n",
+        "    # convert output probabilities to predicted class\n",
+        "    _, pred = torch.max(output, 1)\n",
+        "    # compare predictions to true label\n",
+        "    correct_tensor = pred.eq(target.data.view_as(pred))\n",
+        "    correct = (\n",
+        "        np.squeeze(correct_tensor.numpy())\n",
+        "        if not train_on_gpu\n",
+        "        else np.squeeze(correct_tensor.cpu().numpy())\n",
+        "    )\n",
+        "    # calculate test accuracy for each object class\n",
+        "    for i in range(batch_size):\n",
+        "        label = target.data[i]\n",
+        "        class_correct[label] += correct[i].item()\n",
+        "        class_total[label] += 1\n",
+        "\n",
+        "# average test loss\n",
+        "test_loss = test_loss / len(test_loader)\n",
+        "print(\"Test Loss: {:.6f}\\n\".format(test_loss))\n",
+        "\n",
+        "for i in range(10):\n",
+        "    if class_total[i] > 0:\n",
+        "        print(\n",
+        "            \"Test Accuracy of %5s: %2d%% (%2d/%2d)\"\n",
+        "            % (\n",
+        "                classes[i],\n",
+        "                100 * class_correct[i] / class_total[i],\n",
+        "                np.sum(class_correct[i]),\n",
+        "                np.sum(class_total[i]),\n",
+        "            )\n",
+        "        )\n",
+        "    else:\n",
+        "        print(\"Test Accuracy of %5s: N/A (no training examples)\" % (classes[i]))\n",
+        "\n",
+        "print(\n",
+        "    \"\\nTest Accuracy (Overall): %2d%% (%2d/%2d)\"\n",
+        "    % (\n",
+        "        100.0 * np.sum(class_correct) / np.sum(class_total),\n",
+        "        np.sum(class_correct),\n",
+        "        np.sum(class_total),\n",
+        "    )\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "944991a2",
+      "metadata": {
+        "id": "944991a2"
+      },
+      "source": [
+        "Build a new network with the following structure.\n",
+        "\n",
+        "- It has 3 convolutional layers of kernel size 3 and padding of 1.\n",
+        "- The first convolutional layer must output 16 channels, the second 32 and the third 64.\n",
+        "- At each convolutional layer output, we apply a ReLU activation then a MaxPool with kernel size of 2.\n",
+        "- Then, three fully connected layers, the first two being followed by a ReLU activation and a dropout whose value you will suggest.\n",
+        "- The first fully connected layer will have an output size of 512.\n",
+        "- The second fully connected layer will have an output size of 64.\n",
+        "\n",
+        "Compare the results obtained with this new network to those obtained previously."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 22,
+      "id": "2DvrdR_nsGqq",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "2DvrdR_nsGqq",
+        "outputId": "1648c077-ba3f-4f7a-9a33-2d5777092e10"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Net2(\n",
+            "  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+            "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+            "  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+            "  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+            "  (fc1): Linear(in_features=1024, out_features=512, bias=True)\n",
+            "  (fc2): Linear(in_features=512, out_features=64, bias=True)\n",
+            "  (fc3): Linear(in_features=64, out_features=10, bias=True)\n",
+            "  (dropout): Dropout(p=0.5, inplace=False)\n",
+            ")\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch.nn as nn\n",
+        "import torch.nn.functional as F\n",
+        "\n",
+        "class Net2(nn.Module):  # Ensure it inherits from nn.Module\n",
+        "    def __init__(self, dropout_prob):\n",
+        "        super(Net2, self).__init__()\n",
+        "        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)  # (input channels, output channels, kernel size 3x3)\n",
+        "        self.pool = nn.MaxPool2d(2, 2)  # (kernel size, stride)\n",
+        "        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)  # (input channels, output channels, kernel size 3x3)\n",
+        "        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)  # (input channels, output channels, kernel size 3x3)\n",
+        "        self.fc1 = nn.Linear(64 * 4 * 4, 512)  # (input, output)\n",
+        "        self.fc2 = nn.Linear(512, 64)  # (input, output)\n",
+        "        self.fc3 = nn.Linear(64, 10)  # (input, output = number of classes)\n",
+        "\n",
+        "        # Dropout layer\n",
+        "        self.dropout = nn.Dropout(p=dropout_prob)\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        x = self.pool(F.relu(self.conv1(x)))\n",
+        "        x = self.pool(F.relu(self.conv2(x)))\n",
+        "        x = self.pool(F.relu(self.conv3(x)))\n",
+        "        x = x.view(-1, 64 * 4 * 4)\n",
+        "        x = F.relu(self.fc1(x))\n",
+        "        x = F.relu(self.fc2(x))\n",
+        "        x = self.fc3(x)\n",
+        "        x = self.dropout(x)  # Apply dropout\n",
+        "        return x\n",
+        "\n",
+        "# Create a complete CNN\n",
+        "model2 = Net2(dropout_prob=0.5)  # Pass dropout probability when creating the model\n",
+        "print(model2)\n",
+        "\n",
+        "# Move tensors to GPU if CUDA is available\n",
+        "train_on_gpu = torch.cuda.is_available()\n",
+        "if train_on_gpu:\n",
+        "    model2.cuda()\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "id": "IJz2Q9T25Qc3",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "IJz2Q9T25Qc3",
+        "outputId": "121b9032-8481-4be3-e542-2ece8d8dbe2a"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch: 0 \tTraining Loss: 45.904219 \tValidation Loss: 44.942664\n",
+            "Validation loss decreased (inf --> 44.942664).  Saving model ...\n",
+            "Epoch: 1 \tTraining Loss: 42.813407 \tValidation Loss: 38.436903\n",
+            "Validation loss decreased (44.942664 --> 38.436903).  Saving model ...\n",
+            "Epoch: 2 \tTraining Loss: 39.437505 \tValidation Loss: 34.759394\n",
+            "Validation loss decreased (38.436903 --> 34.759394).  Saving model ...\n",
+            "Epoch: 3 \tTraining Loss: 37.662391 \tValidation Loss: 32.064202\n",
+            "Validation loss decreased (34.759394 --> 32.064202).  Saving model ...\n",
+            "Epoch: 4 \tTraining Loss: 36.459317 \tValidation Loss: 31.174973\n",
+            "Validation loss decreased (32.064202 --> 31.174973).  Saving model ...\n",
+            "Epoch: 5 \tTraining Loss: 35.285123 \tValidation Loss: 28.916658\n",
+            "Validation loss decreased (31.174973 --> 28.916658).  Saving model ...\n",
+            "Epoch: 6 \tTraining Loss: 34.439550 \tValidation Loss: 27.563518\n",
+            "Validation loss decreased (28.916658 --> 27.563518).  Saving model ...\n",
+            "Epoch: 7 \tTraining Loss: 33.610940 \tValidation Loss: 26.579835\n",
+            "Validation loss decreased (27.563518 --> 26.579835).  Saving model ...\n",
+            "Epoch: 8 \tTraining Loss: 32.717580 \tValidation Loss: 25.121927\n",
+            "Validation loss decreased (26.579835 --> 25.121927).  Saving model ...\n",
+            "Epoch: 9 \tTraining Loss: 31.905730 \tValidation Loss: 24.054429\n",
+            "Validation loss decreased (25.121927 --> 24.054429).  Saving model ...\n",
+            "Epoch: 10 \tTraining Loss: 31.197301 \tValidation Loss: 22.744985\n",
+            "Validation loss decreased (24.054429 --> 22.744985).  Saving model ...\n",
+            "Epoch: 11 \tTraining Loss: 30.428635 \tValidation Loss: 22.388091\n",
+            "Validation loss decreased (22.744985 --> 22.388091).  Saving model ...\n",
+            "Epoch: 12 \tTraining Loss: 29.790207 \tValidation Loss: 21.463255\n",
+            "Validation loss decreased (22.388091 --> 21.463255).  Saving model ...\n",
+            "Epoch: 13 \tTraining Loss: 29.057292 \tValidation Loss: 20.476804\n",
+            "Validation loss decreased (21.463255 --> 20.476804).  Saving model ...\n",
+            "Epoch: 14 \tTraining Loss: 28.433569 \tValidation Loss: 20.292996\n",
+            "Validation loss decreased (20.476804 --> 20.292996).  Saving model ...\n",
+            "Epoch: 15 \tTraining Loss: 27.846333 \tValidation Loss: 19.485786\n",
+            "Validation loss decreased (20.292996 --> 19.485786).  Saving model ...\n",
+            "Epoch: 16 \tTraining Loss: 27.332442 \tValidation Loss: 19.583211\n",
+            "Epoch: 17 \tTraining Loss: 26.768378 \tValidation Loss: 18.470492\n",
+            "Validation loss decreased (19.485786 --> 18.470492).  Saving model ...\n",
+            "Epoch: 18 \tTraining Loss: 26.166088 \tValidation Loss: 18.522611\n",
+            "Epoch: 19 \tTraining Loss: 25.595460 \tValidation Loss: 17.577921\n",
+            "Validation loss decreased (18.470492 --> 17.577921).  Saving model ...\n",
+            "Epoch: 20 \tTraining Loss: 25.090311 \tValidation Loss: 17.772856\n",
+            "Epoch: 21 \tTraining Loss: 24.457410 \tValidation Loss: 17.996507\n",
+            "Epoch: 22 \tTraining Loss: 24.041111 \tValidation Loss: 17.588359\n",
+            "Epoch: 23 \tTraining Loss: 23.373427 \tValidation Loss: 17.087331\n",
+            "Validation loss decreased (17.577921 --> 17.087331).  Saving model ...\n",
+            "Epoch: 24 \tTraining Loss: 22.925357 \tValidation Loss: 17.181599\n",
+            "Epoch: 25 \tTraining Loss: 22.313664 \tValidation Loss: 17.449139\n",
+            "Epoch: 26 \tTraining Loss: 21.707366 \tValidation Loss: 17.998827\n",
+            "Epoch: 27 \tTraining Loss: 21.505335 \tValidation Loss: 17.853451\n",
+            "Epoch: 28 \tTraining Loss: 20.992254 \tValidation Loss: 17.821120\n",
+            "Epoch: 29 \tTraining Loss: 20.592966 \tValidation Loss: 19.540560\n"
+          ]
+        }
+      ],
+      "source": [
+        "import torch.optim as optim\n",
+        "\n",
+        "criterion = nn.CrossEntropyLoss()  # specify loss function\n",
+        "optimizer = optim.SGD(model2.parameters(), lr=0.01)  # specify optimizer\n",
+        "\n",
+        "n_epochs = 30  # number of epochs to train the model\n",
+        "train_loss_list = []  # list to store loss to visualize\n",
+        "valid_loss_min = np.Inf  # track change in validation loss\n",
+        "\n",
+        "for epoch in range(n_epochs):\n",
+        "    # Keep track of training and validation loss\n",
+        "    train_loss = 0.0\n",
+        "    valid_loss = 0.0\n",
+        "\n",
+        "    # Train the model\n",
+        "    model2.train()\n",
+        "    for data, target in train_loader:\n",
+        "        # Move tensors to GPU if CUDA is available\n",
+        "        if train_on_gpu:\n",
+        "            data, target = data.cuda(), target.cuda()\n",
+        "        # Clear the gradients of all optimized variables\n",
+        "        optimizer.zero_grad()\n",
+        "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+        "        output = model2(data)\n",
+        "        # Calculate the batch loss\n",
+        "        loss = criterion(output, target)\n",
+        "        # Backward pass: compute gradient of the loss with respect to model parameters\n",
+        "        loss.backward()\n",
+        "        # Perform a single optimization step (parameter update)\n",
+        "        optimizer.step()\n",
+        "        # Update training loss\n",
+        "        train_loss += loss.item() * data.size(0)\n",
+        "\n",
+        "    # Validate the model\n",
+        "    model2.eval()\n",
+        "    for data, target in valid_loader:\n",
+        "        # Move tensors to GPU if CUDA is available\n",
+        "        if train_on_gpu:\n",
+        "            data, target = data.cuda(), target.cuda()\n",
+        "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+        "        output = model2(data)\n",
+        "        # Calculate the batch loss\n",
+        "        loss = criterion(output, target)\n",
+        "        # Update average validation loss\n",
+        "        valid_loss += loss.item() * data.size(0)\n",
+        "\n",
+        "    # Calculate average losses\n",
+        "    train_loss = train_loss / len(train_loader)\n",
+        "    valid_loss = valid_loss / len(valid_loader)\n",
+        "    train_loss_list.append(train_loss)\n",
+        "\n",
+        "    # Print training/validation statistics\n",
+        "    print(\n",
+        "        \"Epoch: {} \\tTraining Loss: {:.6f} \\tValidation Loss: {:.6f}\".format(\n",
+        "            epoch, train_loss, valid_loss\n",
+        "        )\n",
+        "    )\n",
+        "\n",
+        "    # Save model if validation loss has decreased\n",
+        "    if valid_loss <= valid_loss_min:\n",
+        "        print(\n",
+        "            \"Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...\".format(\n",
+        "                valid_loss_min, valid_loss\n",
+        "            )\n",
+        "        )\n",
+        "        torch.save(model2.state_dict(), \"/content/save_data/model2_cifar.pt\")\n",
+        "        valid_loss_min = valid_loss"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "hNYf38f_sSfo",
+      "metadata": {
+        "id": "hNYf38f_sSfo"
+      },
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "id": "aQVARMhv7y1b",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 472
+        },
+        "id": "aQVARMhv7y1b",
+        "outputId": "ba89cc31-b12e-4ada-f750-e52d31b3bece"
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 640x480 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ],
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "plt.plot(range(n_epochs), train_loss_list)\n",
+        "plt.xlabel(\"Epoch\")\n",
+        "plt.ylabel(\"Loss\")\n",
+        "plt.title(\"Performance of Model 2\")\n",
+        "plt.show()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 25,
+      "id": "06j_Dr6475Kb",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "06j_Dr6475Kb",
+        "outputId": "a359313d-70bb-43e0-918c-309684e43037"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "<ipython-input-25-ee85c886ffd9>:1: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+            "  model2.load_state_dict(torch.load(\"/content/save_data/model2_cifar.pt\"))\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Test Loss: 17.172099\n",
+            "\n",
+            "Test Accuracy of airplane: 76% (765/1000)\n",
+            "Test Accuracy of automobile: 77% (776/1000)\n",
+            "Test Accuracy of  bird: 58% (588/1000)\n",
+            "Test Accuracy of   cat: 57% (573/1000)\n",
+            "Test Accuracy of  deer: 65% (650/1000)\n",
+            "Test Accuracy of   dog: 53% (530/1000)\n",
+            "Test Accuracy of  frog: 78% (783/1000)\n",
+            "Test Accuracy of horse: 78% (782/1000)\n",
+            "Test Accuracy of  ship: 82% (828/1000)\n",
+            "Test Accuracy of truck: 79% (790/1000)\n",
+            "\n",
+            "Test Accuracy (Overall): 70% (7065/10000)\n"
+          ]
+        }
+      ],
+      "source": [
+        "model2.load_state_dict(torch.load(\"/content/save_data/model2_cifar.pt\"))\n",
+        "\n",
+        "# track test loss\n",
+        "test_loss = 0.0\n",
+        "class_correct = list(0.0 for i in range(10))\n",
+        "class_total = list(0.0 for i in range(10))\n",
+        "\n",
+        "model2.eval()\n",
+        "# iterate over test data\n",
+        "for data, target in test_loader:\n",
+        "    # move tensors to GPU if CUDA is available\n",
+        "    if train_on_gpu:\n",
+        "        data, target = data.cuda(), target.cuda()\n",
+        "    # forward pass: compute predicted outputs by passing inputs to the model\n",
+        "    output = model2(data)\n",
+        "    # calculate the batch loss\n",
+        "    loss = criterion(output, target)\n",
+        "    # update test loss\n",
+        "    test_loss += loss.item() * data.size(0)\n",
+        "    # convert output probabilities to predicted class\n",
+        "    _, pred = torch.max(output, 1)\n",
+        "    # compare predictions to true label\n",
+        "    correct_tensor = pred.eq(target.data.view_as(pred))\n",
+        "    correct = (\n",
+        "        np.squeeze(correct_tensor.numpy())\n",
+        "        if not train_on_gpu\n",
+        "        else np.squeeze(correct_tensor.cpu().numpy())\n",
+        "    )\n",
+        "    # calculate test accuracy for each object class\n",
+        "    for i in range(batch_size):\n",
+        "        label = target.data[i]\n",
+        "        class_correct[label] += correct[i].item()\n",
+        "        class_total[label] += 1\n",
+        "\n",
+        "# average test loss\n",
+        "test_loss = test_loss / len(test_loader)\n",
+        "print(\"Test Loss: {:.6f}\\n\".format(test_loss))\n",
+        "\n",
+        "for i in range(10):\n",
+        "    if class_total[i] > 0:\n",
+        "        print(\n",
+        "            \"Test Accuracy of %5s: %2d%% (%2d/%2d)\"\n",
+        "            % (\n",
+        "                classes[i],\n",
+        "                100 * class_correct[i] / class_total[i],\n",
+        "                np.sum(class_correct[i]),\n",
+        "                np.sum(class_total[i]),\n",
+        "            )\n",
+        "        )\n",
+        "    else:\n",
+        "        print(\"Test Accuracy of %5s: N/A (no training examples)\" % (classes[i]))\n",
+        "\n",
+        "print(\n",
+        "    \"\\nTest Accuracy (Overall): %2d%% (%2d/%2d)\"\n",
+        "    % (\n",
+        "        100.0 * np.sum(class_correct) / np.sum(class_total),\n",
+        "        np.sum(class_correct),\n",
+        "        np.sum(class_total),\n",
+        "    )\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "bc381cf4",
+      "metadata": {
+        "id": "bc381cf4"
+      },
+      "source": [
+        "## Exercise 2: Quantization: try to compress the CNN to save space\n",
+        "\n",
+        "Quantization doc is available from https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic\n",
+        "        \n",
+        "The Exercise is to quantize post training the above CNN model. Compare the size reduction and the impact on the classification accuracy\n",
+        "\n",
+        "\n",
+        "The size of the model is simply the size of the file."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 27,
+      "id": "ef623c26",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ef623c26",
+        "outputId": "b12fc3df-3534-4c11-bd78-01a56839bbb7"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "model:  fp32  \t Size (KB): 251.342\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "251342"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 27
+        }
+      ],
+      "source": [
+        "import os\n",
+        "\n",
+        "\n",
+        "def print_size_of_model(model, label=\"\"):\n",
+        "    torch.save(model.state_dict(), \"temp.p\")\n",
+        "    size = os.path.getsize(\"temp.p\")\n",
+        "    print(\"model: \", label, \" \\t\", \"Size (KB):\", size / 1e3)\n",
+        "    os.remove(\"temp.p\")\n",
+        "    return size\n",
+        "\n",
+        "\n",
+        "print_size_of_model(model, \"fp32\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "05c4e9ad",
+      "metadata": {
+        "id": "05c4e9ad"
+      },
+      "source": [
+        "Post training quantization example"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import torch.quantization\n",
+        "\n",
+        "model2.to(\"cpu\")\n",
+        "quantized_model = torch.quantization.quantize_dynamic(model2, dtype=torch.qint8)\n",
+        "print_size_of_model(model2, \"fp32\")\n",
+        "print_size_of_model(quantized_model, \"int8\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "38_XJKcrX5Lq",
+        "outputId": "0482c566-6374-46a1-ef5d-4988bd94002d"
+      },
+      "id": "38_XJKcrX5Lq",
+      "execution_count": 28,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "model:  fp32  \t Size (KB): 2330.946\n",
+            "model:  int8  \t Size (KB): 659.806\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "659806"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 28
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The size of the model reduces by more than 3 thanks to the dynamic quantization."
+      ],
+      "metadata": {
+        "id": "mCL0R6GGatxw"
+      },
+      "id": "mCL0R6GGatxw"
+    },
+    {
+      "cell_type": "markdown",
+      "id": "7b108e17",
+      "metadata": {
+        "id": "7b108e17"
+      },
+      "source": [
+        "For each class, compare the classification test accuracy of the initial model and the quantized model. Also give the overall test accuracy for both models."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 29,
+      "id": "ZXeLJC39QjOP",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ZXeLJC39QjOP",
+        "outputId": "8d7fdd63-6a5f-4289-931c-e3fa874288e2"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Net2(\n",
+            "  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+            "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+            "  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+            "  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+            "  (fc1): DynamicQuantizedLinear(in_features=1024, out_features=512, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
+            "  (fc2): DynamicQuantizedLinear(in_features=512, out_features=64, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
+            "  (fc3): DynamicQuantizedLinear(in_features=64, out_features=10, dtype=torch.qint8, qscheme=torch.per_tensor_affine)\n",
+            "  (dropout): Dropout(p=0.5, inplace=False)\n",
+            ")\n",
+            "Test Loss: 17.187755\n",
+            "\n",
+            "Test Accuracy of airplane: 76% (765/1000)\n",
+            "Test Accuracy of automobile: 77% (775/1000)\n",
+            "Test Accuracy of  bird: 58% (585/1000)\n",
+            "Test Accuracy of   cat: 57% (572/1000)\n",
+            "Test Accuracy of  deer: 65% (653/1000)\n",
+            "Test Accuracy of   dog: 52% (527/1000)\n",
+            "Test Accuracy of  frog: 78% (785/1000)\n",
+            "Test Accuracy of horse: 78% (782/1000)\n",
+            "Test Accuracy of  ship: 82% (829/1000)\n",
+            "Test Accuracy of truck: 79% (790/1000)\n",
+            "\n",
+            "Test Accuracy (Overall): 70% (7063/10000)\n"
+          ]
+        }
+      ],
+      "source": [
+        "#try with CPU dynamic quantization --> need to convert GPU to CPU device\n",
+        "\n",
+        "# track test loss\n",
+        "test_loss = 0.0\n",
+        "class_correct = list(0.0 for i in range(10))\n",
+        "class_total = list(0.0 for i in range(10))\n",
+        "\n",
+        "quantized_model.eval()\n",
+        "print(quantized_model)\n",
+        "\n",
+        "# iterate over test data\n",
+        "for data, target in test_loader:\n",
+        "    # move tensors to GPU if CUDA is available\n",
+        "    data, target = data.cpu(), target.cpu()\n",
+        "\n",
+        "    #print(data.device, target.device, next(quantized_model.parameters()).device)\n",
+        "    # forward pass: compute predicted outputs by passing inputs to the model\n",
+        "    with torch.no_grad() :\n",
+        "      output = quantized_model(data)\n",
+        "    # calculate the batch loss\n",
+        "    loss = criterion(output, target)\n",
+        "    # update test loss\n",
+        "    test_loss += loss.item() * data.size(0)\n",
+        "    # convert output probabilities to predicted class\n",
+        "    _, pred = torch.max(output, 1)\n",
+        "    # compare predictions to true label\n",
+        "    correct_tensor = pred.eq(target.data.view_as(pred))\n",
+        "    correct = (\n",
+        "        np.squeeze(correct_tensor.cpu().numpy())\n",
+        "\n",
+        "    )\n",
+        "    # calculate test accuracy for each object class\n",
+        "    for i in range(batch_size):\n",
+        "        label = target.data[i]\n",
+        "        class_correct[label] += correct[i].item()\n",
+        "        class_total[label] += 1\n",
+        "\n",
+        "# average test loss\n",
+        "test_loss = test_loss / len(test_loader)\n",
+        "print(\"Test Loss: {:.6f}\\n\".format(test_loss))\n",
+        "\n",
+        "for i in range(10):\n",
+        "    if class_total[i] > 0:\n",
+        "        print(\n",
+        "            \"Test Accuracy of %5s: %2d%% (%2d/%2d)\"\n",
+        "            % (\n",
+        "                classes[i],\n",
+        "                100 * class_correct[i] / class_total[i],\n",
+        "                np.sum(class_correct[i]),\n",
+        "                np.sum(class_total[i]),\n",
+        "            )\n",
+        "        )\n",
+        "    else:\n",
+        "        print(\"Test Accuracy of %5s: N/A (no training examples)\" % (classes[i]))\n",
+        "\n",
+        "print(\n",
+        "    \"\\nTest Accuracy (Overall): %2d%% (%2d/%2d)\"\n",
+        "    % (\n",
+        "        100.0 * np.sum(class_correct) / np.sum(class_total),\n",
+        "        np.sum(class_correct),\n",
+        "        np.sum(class_total),\n",
+        "    )\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "**Answer** -->The overall test accuracies for the dense model and the quantized model are very close (respectfully 70% and 70,63%). The class accuracies are  equal for both models, except for dog, where the quantized model has a reduced accuracy of 1%. Looking at the gain by reducing the size of the model, we can consider than it is more interesting to use the quantized model in this case."
+      ],
+      "metadata": {
+        "id": "NIb1ZktXa6LL"
+      },
+      "id": "NIb1ZktXa6LL"
+    },
+    {
+      "cell_type": "markdown",
+      "id": "a0a34b90",
+      "metadata": {
+        "id": "a0a34b90"
+      },
+      "source": [
+        "Try training aware quantization to mitigate the impact on the accuracy (doc available here https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic)\n",
+        "\n",
+        "To do so, we first have to define a new neural network that will support theaware quantization."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#Applying QAT (Quantized Aware Training)\n",
+        "\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import torch.quantization\n",
+        "\n",
+        "# Example model\n",
+        "class QatNet(nn.Module):\n",
+        "  def __init__(self, dropout_prob):\n",
+        "        super(QatNet, self).__init__()\n",
+        "        self.quant = torch.ao.quantization.QuantStub()\n",
+        "        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)  # (input channels, output channels, kernel size 3x3)\n",
+        "        self.pool = nn.MaxPool2d(2, 2)  # (kernel size, stride)\n",
+        "        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)  # (input channels, output channels, kernel size 3x3)\n",
+        "        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)  # (input channels, output channels, kernel size 3x3)\n",
+        "        self.fc1 = nn.Linear(64 * 4 * 4, 512)  # (input, output)\n",
+        "        self.fc2 = nn.Linear(512, 64)  # (input, output)\n",
+        "        self.fc3 = nn.Linear(64, 10)  # (input, output = number of classes)\n",
+        "\n",
+        "        # Dropout layer\n",
+        "        self.dropout = nn.Dropout(p=dropout_prob)\n",
+        "\n",
+        "  def forward(self, x):\n",
+        "      x = self.pool(F.relu(self.conv1(x)))\n",
+        "      x = self.pool(F.relu(self.conv2(x)))\n",
+        "      x = self.pool(F.relu(self.conv3(x)))\n",
+        "      x = x.view(-1, 64 * 4 * 4)\n",
+        "      x = F.relu(self.fc1(x))\n",
+        "      x = F.relu(self.fc2(x))\n",
+        "      x = self.fc3(x)\n",
+        "      x = self.dropout(x)  # Apply dropout\n",
+        "      return x\n",
+        "\n",
+        "# Instantiate the model\n",
+        "qat_model = QatNet(dropout_prob = 0.5)\n",
+        "\n",
+        "# Set the model's qconfig\n",
+        "qat_model.qconfig = torch.quantization.get_default_qat_qconfig(\"fbgemm\")\n",
+        "\n",
+        "# Prepare the model for QAT\n",
+        "torch.quantization.prepare_qat(qat_model, inplace=True)\n",
+        "\n",
+        "print(qat_model)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ylrc7tZaKWd4",
+        "outputId": "75edab52-1360-46f4-eb1f-f50d4e928044"
+      },
+      "id": "ylrc7tZaKWd4",
+      "execution_count": 30,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "QatNet(\n",
+            "  (quant): QuantStub(\n",
+            "    (activation_post_process): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=127, qscheme=torch.per_tensor_affine, reduce_range=True\n",
+            "      (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)\n",
+            "    )\n",
+            "  )\n",
+            "  (conv1): Conv2d(\n",
+            "    3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n",
+            "    (weight_fake_quant): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_channel_symmetric, reduce_range=False\n",
+            "      (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "    )\n",
+            "    (activation_post_process): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=127, qscheme=torch.per_tensor_affine, reduce_range=True\n",
+            "      (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)\n",
+            "    )\n",
+            "  )\n",
+            "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+            "  (conv2): Conv2d(\n",
+            "    16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n",
+            "    (weight_fake_quant): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_channel_symmetric, reduce_range=False\n",
+            "      (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "    )\n",
+            "    (activation_post_process): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=127, qscheme=torch.per_tensor_affine, reduce_range=True\n",
+            "      (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)\n",
+            "    )\n",
+            "  )\n",
+            "  (conv3): Conv2d(\n",
+            "    32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n",
+            "    (weight_fake_quant): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_channel_symmetric, reduce_range=False\n",
+            "      (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "    )\n",
+            "    (activation_post_process): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=127, qscheme=torch.per_tensor_affine, reduce_range=True\n",
+            "      (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)\n",
+            "    )\n",
+            "  )\n",
+            "  (fc1): Linear(\n",
+            "    in_features=1024, out_features=512, bias=True\n",
+            "    (weight_fake_quant): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_channel_symmetric, reduce_range=False\n",
+            "      (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "    )\n",
+            "    (activation_post_process): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=127, qscheme=torch.per_tensor_affine, reduce_range=True\n",
+            "      (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)\n",
+            "    )\n",
+            "  )\n",
+            "  (fc2): Linear(\n",
+            "    in_features=512, out_features=64, bias=True\n",
+            "    (weight_fake_quant): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_channel_symmetric, reduce_range=False\n",
+            "      (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "    )\n",
+            "    (activation_post_process): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=127, qscheme=torch.per_tensor_affine, reduce_range=True\n",
+            "      (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)\n",
+            "    )\n",
+            "  )\n",
+            "  (fc3): Linear(\n",
+            "    in_features=64, out_features=10, bias=True\n",
+            "    (weight_fake_quant): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_channel_symmetric, reduce_range=False\n",
+            "      (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))\n",
+            "    )\n",
+            "    (activation_post_process): FusedMovingAvgObsFakeQuantize(\n",
+            "      fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=127, qscheme=torch.per_tensor_affine, reduce_range=True\n",
+            "      (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf)\n",
+            "    )\n",
+            "  )\n",
+            "  (dropout): Dropout(p=0.5, inplace=False)\n",
+            ")\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/torch/ao/quantization/observer.py:229: UserWarning: Please use quant_min and quant_max to specify the range for observers.                     reduce_range will be deprecated in a future release of PyTorch.\n",
+            "  warnings.warn(\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Training the QAT Model"
+      ],
+      "metadata": {
+        "id": "WCnevnUuRxY9"
+      },
+      "id": "WCnevnUuRxY9"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Example training dataset\n",
+        "import torch.optim as optim\n",
+        "import numpy as np\n",
+        "\n",
+        "criterion = nn.CrossEntropyLoss()  # specify loss function\n",
+        "optimizer = optim.SGD(qat_model.parameters(), lr=0.01)  # specify optimizer\n",
+        "\n",
+        "n_epochs = 30  # number of epochs to train the model\n",
+        "train_loss_list = []  # list to store loss to visualize\n",
+        "valid_loss_min = np.Inf  # track change in validation loss\n",
+        "\n",
+        "qat_model = qat_model.cuda()\n",
+        "\n",
+        "for epoch in range(n_epochs):\n",
+        "    # Keep track of training and validation loss\n",
+        "    train_loss = 0.0\n",
+        "    valid_loss = 0.0\n",
+        "\n",
+        "    # Train the model\n",
+        "    qat_model.train()\n",
+        "    for data, target in train_loader:\n",
+        "      if train_on_gpu :\n",
+        "        data, target = data.cuda(), target.cuda()\n",
+        "        # Clear the gradients of all optimized variables\n",
+        "        optimizer.zero_grad()\n",
+        "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+        "        output = qat_model(data)\n",
+        "        # Calculate the batch loss\n",
+        "        loss = criterion(output, target)\n",
+        "        # Backward pass: compute gradient of the loss with respect to model parameters\n",
+        "        loss.backward()\n",
+        "        # Perform a single optimization step (parameter update)\n",
+        "        optimizer.step()\n",
+        "        # Update training loss\n",
+        "        train_loss += loss.item() * data.size(0)\n",
+        "\n",
+        "    # Validate the model\n",
+        "    qat_model.eval()\n",
+        "    for data, target in valid_loader:\n",
+        "      if train_on_gpu :\n",
+        "        # Move tensors to GPU if CUDA is available\n",
+        "        data, target = data.cuda(), target.cuda()\n",
+        "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+        "        output = qat_model(data)\n",
+        "        # Calculate the batch loss\n",
+        "        loss = criterion(output, target)\n",
+        "        # Update average validation loss\n",
+        "        valid_loss += loss.item() * data.size(0)\n",
+        "\n",
+        "    # Calculate average losses\n",
+        "    train_loss = train_loss / len(train_loader)\n",
+        "    valid_loss = valid_loss / len(valid_loader)\n",
+        "    train_loss_list.append(train_loss)\n",
+        "\n",
+        "    # Print training/validation statistics\n",
+        "    print(\n",
+        "        \"Epoch: {} \\tTraining Loss: {:.6f} \\tValidation Loss: {:.6f}\".format(\n",
+        "            epoch, train_loss, valid_loss\n",
+        "        )\n",
+        "    )\n",
+        "\n",
+        "    # Save model if validation loss has decreased\n",
+        "    if valid_loss <= valid_loss_min:\n",
+        "        print(\n",
+        "            \"Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...\".format(\n",
+        "                valid_loss_min, valid_loss\n",
+        "            )\n",
+        "        )\n",
+        "        torch.save(qat_model.state_dict(), \"/content/save_data/qat_model_cifar.pt\")\n",
+        "        valid_loss_min = valid_loss\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "uG_1vRQJRvkd",
+        "outputId": "1cf5a022-aeff-485b-b7c9-47cf52879034"
+      },
+      "id": "uG_1vRQJRvkd",
+      "execution_count": 31,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch: 0 \tTraining Loss: 45.607654 \tValidation Loss: 43.456702\n",
+            "Validation loss decreased (inf --> 43.456702).  Saving model ...\n",
+            "Epoch: 1 \tTraining Loss: 41.996771 \tValidation Loss: 37.539250\n",
+            "Validation loss decreased (43.456702 --> 37.539250).  Saving model ...\n",
+            "Epoch: 2 \tTraining Loss: 39.226181 \tValidation Loss: 34.510424\n",
+            "Validation loss decreased (37.539250 --> 34.510424).  Saving model ...\n",
+            "Epoch: 3 \tTraining Loss: 37.682786 \tValidation Loss: 32.866509\n",
+            "Validation loss decreased (34.510424 --> 32.866509).  Saving model ...\n",
+            "Epoch: 4 \tTraining Loss: 36.638718 \tValidation Loss: 30.486939\n",
+            "Validation loss decreased (32.866509 --> 30.486939).  Saving model ...\n",
+            "Epoch: 5 \tTraining Loss: 35.586891 \tValidation Loss: 28.771608\n",
+            "Validation loss decreased (30.486939 --> 28.771608).  Saving model ...\n",
+            "Epoch: 6 \tTraining Loss: 34.692187 \tValidation Loss: 27.582011\n",
+            "Validation loss decreased (28.771608 --> 27.582011).  Saving model ...\n",
+            "Epoch: 7 \tTraining Loss: 33.680828 \tValidation Loss: 25.607441\n",
+            "Validation loss decreased (27.582011 --> 25.607441).  Saving model ...\n",
+            "Epoch: 8 \tTraining Loss: 32.750430 \tValidation Loss: 25.514199\n",
+            "Validation loss decreased (25.607441 --> 25.514199).  Saving model ...\n",
+            "Epoch: 9 \tTraining Loss: 31.976220 \tValidation Loss: 24.268018\n",
+            "Validation loss decreased (25.514199 --> 24.268018).  Saving model ...\n",
+            "Epoch: 10 \tTraining Loss: 31.123835 \tValidation Loss: 22.676677\n",
+            "Validation loss decreased (24.268018 --> 22.676677).  Saving model ...\n",
+            "Epoch: 11 \tTraining Loss: 30.569647 \tValidation Loss: 22.446432\n",
+            "Validation loss decreased (22.676677 --> 22.446432).  Saving model ...\n",
+            "Epoch: 12 \tTraining Loss: 29.750041 \tValidation Loss: 20.961087\n",
+            "Validation loss decreased (22.446432 --> 20.961087).  Saving model ...\n",
+            "Epoch: 13 \tTraining Loss: 29.224235 \tValidation Loss: 20.581028\n",
+            "Validation loss decreased (20.961087 --> 20.581028).  Saving model ...\n",
+            "Epoch: 14 \tTraining Loss: 28.792308 \tValidation Loss: 21.056585\n",
+            "Epoch: 15 \tTraining Loss: 27.970131 \tValidation Loss: 19.669330\n",
+            "Validation loss decreased (20.581028 --> 19.669330).  Saving model ...\n",
+            "Epoch: 16 \tTraining Loss: 27.409082 \tValidation Loss: 18.849576\n",
+            "Validation loss decreased (19.669330 --> 18.849576).  Saving model ...\n",
+            "Epoch: 17 \tTraining Loss: 26.906203 \tValidation Loss: 18.456558\n",
+            "Validation loss decreased (18.849576 --> 18.456558).  Saving model ...\n",
+            "Epoch: 18 \tTraining Loss: 26.369765 \tValidation Loss: 18.260281\n",
+            "Validation loss decreased (18.456558 --> 18.260281).  Saving model ...\n",
+            "Epoch: 19 \tTraining Loss: 25.892071 \tValidation Loss: 18.561713\n",
+            "Epoch: 20 \tTraining Loss: 25.191699 \tValidation Loss: 17.584603\n",
+            "Validation loss decreased (18.260281 --> 17.584603).  Saving model ...\n",
+            "Epoch: 21 \tTraining Loss: 24.614829 \tValidation Loss: 18.334314\n",
+            "Epoch: 22 \tTraining Loss: 24.426494 \tValidation Loss: 17.345409\n",
+            "Validation loss decreased (17.584603 --> 17.345409).  Saving model ...\n",
+            "Epoch: 23 \tTraining Loss: 23.477772 \tValidation Loss: 17.319574\n",
+            "Validation loss decreased (17.345409 --> 17.319574).  Saving model ...\n",
+            "Epoch: 24 \tTraining Loss: 22.992848 \tValidation Loss: 17.724551\n",
+            "Epoch: 25 \tTraining Loss: 22.607852 \tValidation Loss: 17.718065\n",
+            "Epoch: 26 \tTraining Loss: 22.302885 \tValidation Loss: 17.835953\n",
+            "Epoch: 27 \tTraining Loss: 21.773108 \tValidation Loss: 18.432680\n",
+            "Epoch: 28 \tTraining Loss: 21.180365 \tValidation Loss: 18.724174\n",
+            "Epoch: 29 \tTraining Loss: 20.909504 \tValidation Loss: 18.800871\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "plt.plot(range(n_epochs), train_loss_list)\n",
+        "plt.xlabel(\"Epoch\")\n",
+        "plt.ylabel(\"Loss\")\n",
+        "plt.title(\"Performance of QAT Model\")\n",
+        "plt.show()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 472
+        },
+        "id": "tYqC1SIM7ap1",
+        "outputId": "3bc958ec-20c5-462b-ed56-284a5f6b1ce0"
+      },
+      "id": "tYqC1SIM7ap1",
+      "execution_count": 32,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 640x480 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "qat_model.to(\"cpu\")\n",
+        "aware_quantized_model = torch.quantization.convert(qat_model.eval(), inplace=False)\n",
+        "#print(aware_quantized_model)\n",
+        "print_size_of_model(qat_model)\n",
+        "print_size_of_model(aware_quantized_model)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "2PPXYaQLUHiS",
+        "outputId": "17c948d9-5464-4e80-ae3f-45de3a740195"
+      },
+      "id": "2PPXYaQLUHiS",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "model:    \t Size (KB): 2370.275\n",
+            "model:    \t Size (KB): 605.094\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "605094"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 38
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Afficher le device du modèle quantifié\n",
+        "params = list(aware_quantized_model.parameters())\n",
+        "print(params)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Ssfsol07HXoO",
+        "outputId": "8c4c5739-d43f-4052-dd38-8ea74135646b"
+      },
+      "id": "Ssfsol07HXoO",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Test QAT Model"
+      ],
+      "metadata": {
+        "id": "JJlHMbBZUMhd"
+      },
+      "id": "JJlHMbBZUMhd"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#try with CPU dynamic quantization --> need to convert GPU to CPU device\n",
+        "\n",
+        "# track test loss\n",
+        "test_loss = 0.0\n",
+        "class_correct = list(0.0 for i in range(10))\n",
+        "class_total = list(0.0 for i in range(10))\n",
+        "\n",
+        "aware_quantized_model.to(\"cpu\")\n",
+        "aware_quantized_model.eval()\n",
+        "print(aware_quantized_model)\n",
+        "\n",
+        "# iterate over test data\n",
+        "for data, target in test_loader:\n",
+        "    # move tensors to CPU\n",
+        "    data, target = data.cpu(), target.cpu()\n",
+        "    # forward pass: compute predicted outputs by passing inputs to the model\n",
+        "    with torch.no_grad() :\n",
+        "      output = aware_quantized_model(data)\n",
+        "    # calculate the batch loss\n",
+        "    loss = criterion(output, target)\n",
+        "    # update test loss\n",
+        "    test_loss += loss.item() * data.size(0)\n",
+        "    # convert output probabilities to predicted class\n",
+        "    _, pred = torch.max(output, 1)\n",
+        "    # compare predictions to true label\n",
+        "    correct_tensor = pred.eq(target.data.view_as(pred))\n",
+        "    correct = (\n",
+        "        np.squeeze(correct_tensor.cpu().numpy())\n",
+        "\n",
+        "    )\n",
+        "    # calculate test accuracy for each object class\n",
+        "    for i in range(batch_size):\n",
+        "        label = target.data[i]\n",
+        "        class_correct[label] += correct[i].item()\n",
+        "        class_total[label] += 1\n",
+        "\n",
+        "# average test loss\n",
+        "test_loss = test_loss / len(test_loader)\n",
+        "print(\"Test Loss: {:.6f}\\n\".format(test_loss))\n",
+        "\n",
+        "for i in range(10):\n",
+        "    if class_total[i] > 0:\n",
+        "        print(\n",
+        "            \"Test Accuracy of %5s: %2d%% (%2d/%2d)\"\n",
+        "            % (\n",
+        "                classes[i],\n",
+        "                100 * class_correct[i] / class_total[i],\n",
+        "                np.sum(class_correct[i]),\n",
+        "                np.sum(class_total[i]),\n",
+        "            )\n",
+        "        )\n",
+        "    else:\n",
+        "        print(\"Test Accuracy of %5s: N/A (no training examples)\" % (classes[i]))\n",
+        "\n",
+        "print(\n",
+        "    \"\\nTest Accuracy (Overall): %2d%% (%2d/%2d)\"\n",
+        "    % (\n",
+        "        100.0 * np.sum(class_correct) / np.sum(class_total),\n",
+        "        np.sum(class_correct),\n",
+        "        np.sum(class_total),\n",
+        "    )\n",
+        ")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "aYDwEmQzUPL5",
+        "outputId": "7b477a0b-9d86-4e0d-dd12-7b92938c10ff"
+      },
+      "id": "aYDwEmQzUPL5",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "QatNet(\n",
+            "  (quant): Quantize(scale=tensor([1.]), zero_point=tensor([0]), dtype=torch.quint8)\n",
+            "  (conv1): QuantizedConv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), scale=0.0678861141204834, zero_point=62, padding=(1, 1))\n",
+            "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+            "  (conv2): QuantizedConv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.0762503445148468, zero_point=47, padding=(1, 1))\n",
+            "  (conv3): QuantizedConv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.10137047618627548, zero_point=69, padding=(1, 1))\n",
+            "  (fc1): QuantizedLinear(in_features=1024, out_features=512, scale=0.06663341820240021, zero_point=56, qscheme=torch.per_channel_affine)\n",
+            "  (fc2): QuantizedLinear(in_features=512, out_features=64, scale=0.05890081077814102, zero_point=51, qscheme=torch.per_channel_affine)\n",
+            "  (fc3): QuantizedLinear(in_features=64, out_features=10, scale=0.06607885658740997, zero_point=76, qscheme=torch.per_channel_affine)\n",
+            "  (dropout): QuantizedDropout(p=0.5, inplace=False)\n",
+            ")\n"
+          ]
+        },
+        {
+          "output_type": "error",
+          "ename": "NotImplementedError",
+          "evalue": "Could not run 'quantized::conv2d.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d.new' is only available for these backends: [Meta, QuantizedCPU, QuantizedCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].\n\nMeta: registered at ../aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]\nQuantizedCPU: registered at ../aten/src/ATen/native/quantized/cpu/qconv.cpp:1972 [kernel]\nQuantizedCUDA: registered at ../aten/src/ATen/native/quantized/cudnn/Conv.cpp:391 [kernel]\nBackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]\nPython: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]\nFuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]\nFunctionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:349 [backend fallback]\nNamed: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]\nConjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]\nNegative: registered at ../aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]\nZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]\nADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:96 [backend fallback]\nAutogradOther: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]\nAutogradCPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:67 [backend fallback]\nAutogradCUDA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:75 [backend fallback]\nAutogradXLA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:79 [backend fallback]\nAutogradMPS: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:87 [backend fallback]\nAutogradXPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:71 [backend fallback]\nAutogradHPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:100 [backend fallback]\nAutogradLazy: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:83 [backend fallback]\nAutogradMeta: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:91 [backend fallback]\nTracer: registered at ../torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]\nAutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:321 [backend fallback]\nAutocastXPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:463 [backend fallback]\nAutocastMPS: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:209 [backend fallback]\nAutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:165 [backend fallback]\nFuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]\nBatchedNestedTensor: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]\nFuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]\nBatched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]\nVmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]\nFuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:207 [backend fallback]\nPythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]\nFuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]\nPreDispatch: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]\nPythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]\n",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mNotImplementedError\u001b[0m                       Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-39-edca34e920f8>\u001b[0m in \u001b[0;36m<cell line: 13>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     16\u001b[0m     \u001b[0;31m# forward pass: compute predicted outputs by passing inputs to the model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mno_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m       \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maware_quantized_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     19\u001b[0m     \u001b[0;31m# calculate the batch loss\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m     \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1734\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1735\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1736\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1737\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1738\u001b[0m     \u001b[0;31m# torchrec tests the code consistency with the following code\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1745\u001b[0m                 \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1746\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1748\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1749\u001b[0m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m<ipython-input-31-736a5f5ba3fe>\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m     22\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m       \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpool\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconv1\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     25\u001b[0m       \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpool\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconv2\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     26\u001b[0m       \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpool\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconv3\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1734\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_compiled_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# type: ignore[misc]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1735\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1736\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call_impl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1737\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1738\u001b[0m     \u001b[0;31m# torchrec tests the code consistency with the following code\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1745\u001b[0m                 \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1746\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1748\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1749\u001b[0m         \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/ao/nn/quantized/modules/conv.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    593\u001b[0m                 \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_reversed_padding_repeated_twice\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpadding_mode\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    594\u001b[0m             )\n\u001b[0;32m--> 595\u001b[0;31m         return ops.quantized.conv2d(\n\u001b[0m\u001b[1;32m    596\u001b[0m             \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_packed_params\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscale\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzero_point\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    597\u001b[0m         )\n",
+            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/_ops.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1114\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_has_torchbind_op_overload\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0m_must_dispatch_in_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1115\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0m_call_overload_packet_from_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1116\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwargs\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1118\u001b[0m     \u001b[0;31m# TODO: use this to make a __dir__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mNotImplementedError\u001b[0m: Could not run 'quantized::conv2d.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d.new' is only available for these backends: [Meta, QuantizedCPU, QuantizedCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].\n\nMeta: registered at ../aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]\nQuantizedCPU: registered at ../aten/src/ATen/native/quantized/cpu/qconv.cpp:1972 [kernel]\nQuantizedCUDA: registered at ../aten/src/ATen/native/quantized/cudnn/Conv.cpp:391 [kernel]\nBackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]\nPython: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]\nFuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]\nFunctionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:349 [backend fallback]\nNamed: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]\nConjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]\nNegative: registered at ../aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]\nZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]\nADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:96 [backend fallback]\nAutogradOther: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]\nAutogradCPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:67 [backend fallback]\nAutogradCUDA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:75 [backend fallback]\nAutogradXLA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:79 [backend fallback]\nAutogradMPS: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:87 [backend fallback]\nAutogradXPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:71 [backend fallback]\nAutogradHPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:100 [backend fallback]\nAutogradLazy: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:83 [backend fallback]\nAutogradMeta: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:91 [backend fallback]\nTracer: registered at ../torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]\nAutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:321 [backend fallback]\nAutocastXPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:463 [backend fallback]\nAutocastMPS: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:209 [backend fallback]\nAutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:165 [backend fallback]\nFuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]\nBatchedNestedTensor: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]\nFuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]\nBatched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]\nVmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]\nFuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:207 [backend fallback]\nPythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]\nFuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]\nPreDispatch: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]\nPythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "201470f9",
+      "metadata": {
+        "id": "201470f9"
+      },
+      "source": [
+        "## Exercise 3: working with pre-trained models.\n",
+        "\n",
+        "PyTorch offers several pre-trained models https://pytorch.org/vision/0.8/models.html        \n",
+        "We will use ResNet50 trained on ImageNet dataset (https://www.image-net.org/index.php). Use the following code with the files `imagenet-simple-labels.json` that contains the imagenet labels and the image dog.png that we will use as test.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "b4d13080",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 545
+        },
+        "id": "b4d13080",
+        "outputId": "b7ff19f5-b9f3-45af-e43a-1500703d8fcf"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n",
+            "  warnings.warn(msg)\n",
+            "Downloading: \"https://download.pytorch.org/models/resnet50-0676ba61.pth\" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth\n",
+            "100%|██████████| 97.8M/97.8M [00:00<00:00, 188MB/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Predicted class is: Golden Retriever\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 640x480 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ],
+      "source": [
+        "import json\n",
+        "import matplotlib.pyplot as plt\n",
+        "from torchvision import models\n",
+        "from PIL import Image\n",
+        "\n",
+        "# Choose an image to pass through the model\n",
+        "test_image = \"dog.png\"\n",
+        "\n",
+        "# Configure matplotlib for pretty inline plots\n",
+        "#%matplotlib inline\n",
+        "#%config InlineBackend.figure_format = 'retina'\n",
+        "\n",
+        "# Prepare the labels\n",
+        "with open(\"imagenet-simple-labels.json\") as f:\n",
+        "    labels = json.load(f)\n",
+        "\n",
+        "# First prepare the transformations: resize the image to what the model was trained on and convert it to a tensor\n",
+        "data_transform = transforms.Compose(\n",
+        "    [\n",
+        "        transforms.Resize((224, 224)),\n",
+        "        transforms.ToTensor(),\n",
+        "        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n",
+        "    ]\n",
+        ")\n",
+        "# Load the image\n",
+        "\n",
+        "image = Image.open(test_image)\n",
+        "plt.imshow(image), plt.xticks([]), plt.yticks([])\n",
+        "\n",
+        "# Now apply the transformation, expand the batch dimension, and send the image to the GPU\n",
+        "# image = data_transform(image).unsqueeze(0).cuda()\n",
+        "image = data_transform(image).unsqueeze(0)\n",
+        "\n",
+        "# Download the model if it's not there already. It will take a bit on the first run, after that it's fast\n",
+        "model3 = models.resnet50(pretrained=True)\n",
+        "# Send the model to the GPU\n",
+        "# model.cuda()\n",
+        "# Set layers such as dropout and batchnorm in evaluation mode\n",
+        "model3.eval()\n",
+        "\n",
+        "# Get the 1000-dimensional model output\n",
+        "out = model3(image)\n",
+        "# Find the predicted class\n",
+        "print(\"Predicted class is: {}\".format(labels[out.argmax()]))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "184cfceb",
+      "metadata": {
+        "id": "184cfceb"
+      },
+      "source": [
+        "Experiments:\n",
+        "\n",
+        "Study the code and the results obtained. Possibly add other images downloaded from the internet.\n",
+        "\n",
+        "What is the size of the model? Quantize it and then check if the model is still able to correctly classify the other images.\n",
+        "\n",
+        "Experiment with other pre-trained CNN models.\n",
+        "\n",
+        "    \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "l6AkxJIEj1BS",
+      "metadata": {
+        "id": "l6AkxJIEj1BS",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 423
+        },
+        "outputId": "7aa1fe62-1965-4cc1-9eee-5ac78555460f"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Predicted class is: tabby cat\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 640x480 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ],
+      "source": [
+        "test_image_2 = 'cat.jpg'\n",
+        "# Load the image\n",
+        "image_cat = Image.open(test_image_2)\n",
+        "plt.imshow(image_cat), plt.xticks([]), plt.yticks([])\n",
+        "\n",
+        "image_cat = data_transform(image_cat).unsqueeze(0)\n",
+        "out = model3(image_cat)\n",
+        "# Find the predicted class\n",
+        "print(\"Predicted class is: {}\".format(labels[out.argmax()]))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "9u8okHN2lSoX",
+      "metadata": {
+        "id": "9u8okHN2lSoX",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 423
+        },
+        "outputId": "a40bab95-7d86-4a19-e155-28eeb66675bc"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Predicted class is: Granny Smith\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 640x480 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ],
+      "source": [
+        "test_image_3 = 'apple.jpg'\n",
+        "# Load the image\n",
+        "image_apple = Image.open(test_image_3)\n",
+        "plt.imshow(image_apple), plt.xticks([]), plt.yticks([])\n",
+        "\n",
+        "image_apple = data_transform(image_apple).unsqueeze(0)\n",
+        "out = model3(image_apple)\n",
+        "# Find the predicted class\n",
+        "print(\"Predicted class is: {}\".format(labels[out.argmax()]))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "FrctIZuFmB8y",
+      "metadata": {
+        "id": "FrctIZuFmB8y"
+      },
+      "source": [
+        "The model recognizes the cat (with description of its coat pattern) and the apple (with its species). We will now quantize the two images and check wether the model can predict the image class again or not."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "fRTBC2nySCRx",
+      "metadata": {
+        "id": "fRTBC2nySCRx",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "4a50ce7e-e628-4ae3-d709-4a557c21a283"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "model:  fp32  \t Size (KB): 102523.238\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "102523238"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 45
+        }
+      ],
+      "source": [
+        "print_size_of_model(model3, \"fp32\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "-IcU6LXlSOHr",
+      "metadata": {
+        "id": "-IcU6LXlSOHr",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "d1aab747-649a-4e5e-bae5-0cef2833b37e"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "model:  int8  \t Size (KB): 96379.996\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "96379996"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 46
+        }
+      ],
+      "source": [
+        "quantized_model3 = torch.quantization.quantize_dynamic(model3, dtype=torch.qint8)\n",
+        "print_size_of_model(quantized_model3, \"int8\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e-6cS1K3USbn",
+      "metadata": {
+        "id": "e-6cS1K3USbn",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "97420fce-b990-49ff-dca6-839b3f896216"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Predicted class is: Golden Retriever\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Get the 1000-dimensional model output\n",
+        "out = quantized_model3(image)\n",
+        "# Find the predicted class\n",
+        "print(\"Predicted class is: {}\".format(labels[out.argmax()]))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "0p2dJTkdpBaG",
+      "metadata": {
+        "id": "0p2dJTkdpBaG",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "4bd0bc4a-8c6b-495b-b67c-2cf14480454b"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Predicted class is: tabby cat\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Get the 1000-dimensional model output\n",
+        "out = quantized_model3(image_cat)\n",
+        "# Find the predicted class\n",
+        "print(\"Predicted class is: {}\".format(labels[out.argmax()]))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Get the 1000-dimensional model output\n",
+        "out = quantized_model3(image_apple)\n",
+        "# Find the predicted class\n",
+        "print(\"Predicted class is: {}\".format(labels[out.argmax()]))"
+      ],
+      "metadata": {
+        "id": "jc2Az0yiMO2e",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "1fa014f6-55de-4146-d20b-f3510c962e12"
+      },
+      "id": "jc2Az0yiMO2e",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Predicted class is: Granny Smith\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The quantized model still allowes to properly classify the 3 images (dog, cat, apple)."
+      ],
+      "metadata": {
+        "id": "Y6wH12UFNfBz"
+      },
+      "id": "Y6wH12UFNfBz"
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Using MobileNet50 pre trained model"
+      ],
+      "metadata": {
+        "id": "LKEjBsRyRRQP"
+      },
+      "id": "LKEjBsRyRRQP"
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "We will try an other popular pre trained model for image classification called MobileNetV2. MobileNetV2 is a lightweight deep learning model designed specifically for mobile and embedded applications, using depthwise separable convolutions."
+      ],
+      "metadata": {
+        "id": "K6ENdJO3OE3X"
+      },
+      "id": "K6ENdJO3OE3X"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import json\n",
+        "import matplotlib.pyplot as plt\n",
+        "from torchvision import models\n",
+        "from PIL import Image\n",
+        "\n",
+        "# Choose an image to pass through the model\n",
+        "test_image = \"dog.png\"\n",
+        "\n",
+        "# Configure matplotlib for pretty inline plots\n",
+        "#%matplotlib inline\n",
+        "#%config InlineBackend.figure_format = 'retina'\n",
+        "\n",
+        "# Prepare the labels\n",
+        "with open(\"imagenet-simple-labels.json\") as f:\n",
+        "    labels = json.load(f)\n",
+        "\n",
+        "# First prepare the transformations: resize the image to what the model was trained on and convert it to a tensor\n",
+        "data_transform = transforms.Compose(\n",
+        "    [\n",
+        "        transforms.Resize((224, 224)),\n",
+        "        transforms.ToTensor(),\n",
+        "        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n",
+        "    ]\n",
+        ")\n",
+        "# Load the image\n",
+        "\n",
+        "image = Image.open(test_image)\n",
+        "image = data_transform(image).unsqueeze(0)\n",
+        "\n",
+        "# Load the pre-trained MobileNetV2 model\n",
+        "mobilenet_model = models.mobilenet_v2(pretrained=True)\n",
+        "mobilenet_model.eval()  # Set the model to evaluation mode\n",
+        "\n",
+        "\n",
+        "mobilenet_model.eval()\n",
+        "\n",
+        "# Get the 1000-dimensional model output\n",
+        "out = mobilenet_model(image)\n",
+        "# Find the predicted class\n",
+        "print(\"Predicted class with MobileNet50 is: {}\".format(labels[out.argmax()]))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Y5hc5nkONkYP",
+        "outputId": "02dcd741-8dda-46e6-f137-74c3413cfe35"
+      },
+      "id": "Y5hc5nkONkYP",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Predicted class with MobileNet50 is: Golden Retriever\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=MobileNet_V2_Weights.IMAGENET1K_V1`. You can also use `weights=MobileNet_V2_Weights.DEFAULT` to get the most up-to-date weights.\n",
+            "  warnings.warn(msg)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "test_image_2 = 'cat.jpg'\n",
+        "# Load the image\n",
+        "image_cat = Image.open(test_image_2)\n",
+        "image_cat = data_transform(image_cat).unsqueeze(0)\n",
+        "out = mobilenet_model(image_cat)\n",
+        "# Find the predicted class\n",
+        "print(\"Predicted class with MobileNet50 is: {}\".format(labels[out.argmax()]))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "IZBfJQMSQF1i",
+        "outputId": "04df923a-aac3-46b3-cc09-7a84392933b3"
+      },
+      "id": "IZBfJQMSQF1i",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Predicted class with MobileNet50 is: Egyptian Mau\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "test_image_3 = 'apple.jpg'\n",
+        "# Load the image\n",
+        "image_apple = Image.open(test_image_3)\n",
+        "image_apple = data_transform(image_apple).unsqueeze(0)\n",
+        "out = model3(image_apple)\n",
+        "# Find the predicted class\n",
+        "print(\"Predicted class with MobileNet50 is: {}\".format(labels[out.argmax()]))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "NYn_FMZ3QQc2",
+        "outputId": "8ace8f48-3d51-40b9-e107-e84f7a8de15d"
+      },
+      "id": "NYn_FMZ3QQc2",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Predicted class with MobileNet50 is: Granny Smith\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "We see that the prediction for the cat is more relevant with this pre-trained model (the previous ResNet50 was giving the coat pattern 'tabby cat' but this model is more precise in its answer and gives the species 'Egyptian Mau'). The dog and apple are properly classified, like with ResNet50 before."
+      ],
+      "metadata": {
+        "id": "XrFOmZt8Qh4A"
+      },
+      "id": "XrFOmZt8Qh4A"
+    },
+    {
+      "cell_type": "markdown",
+      "id": "5d57da4b",
+      "metadata": {
+        "id": "5d57da4b"
+      },
+      "source": [
+        "## Exercise 4: Transfer Learning\n",
+        "    \n",
+        "    \n",
+        "For this work, we will use a pre-trained model (ResNet18) as a descriptor extractor and will refine the classification by training only the last fully connected layer of the network. Thus, the output layer of the pre-trained network will be replaced by a layer adapted to the new classes to be recognized which will be in our case ants and bees.\n",
+        "Download and unzip in your working directory the dataset available at the address :\n",
+        "    \n",
+        "https://download.pytorch.org/tutorial/hymenoptera_data.zip\n",
+        "    \n",
+        "Execute the following code in order to display some images of the dataset."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "id": "2Dzaxe-EFTx9",
+      "metadata": {
+        "id": "2Dzaxe-EFTx9",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "5c990e86-d34a-45c2-985c-ddbf4c5158ff"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ],
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "id": "be2d31f5",
+      "metadata": {
+        "id": "be2d31f5",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 207
+        },
+        "outputId": "8418f722-3bd4-4f85-a08c-3e71d1d16402"
+      },
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<Figure size 640x480 with 1 Axes>"
+            ],
+            "image/png": "\n"
+          },
+          "metadata": {}
+        }
+      ],
+      "source": [
+        "import os\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import torch\n",
+        "import torchvision\n",
+        "from torchvision import datasets, transforms\n",
+        "\n",
+        "# Data augmentation and normalization for training\n",
+        "# Just normalization for validation\n",
+        "data_transforms = {\n",
+        "    \"train\": transforms.Compose(\n",
+        "        [\n",
+        "            transforms.RandomResizedCrop(\n",
+        "                224\n",
+        "            ),  # ImageNet models were trained on 224x224 images\n",
+        "            transforms.RandomHorizontalFlip(),  # flip horizontally 50% of the time - increases train set variability\n",
+        "            transforms.ToTensor(),  # convert it to a PyTorch tensor\n",
+        "            transforms.Normalize(\n",
+        "                [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]\n",
+        "            ),  # ImageNet models expect this norm\n",
+        "        ]\n",
+        "    ),\n",
+        "    \"val\": transforms.Compose(\n",
+        "        [\n",
+        "            transforms.Resize(256),\n",
+        "            transforms.CenterCrop(224),\n",
+        "            transforms.ToTensor(),\n",
+        "            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n",
+        "        ]\n",
+        "    ),\n",
+        "}\n",
+        "\n",
+        "data_dir = \"/content/drive/MyDrive/hymenoptera_data\"\n",
+        "# Create train and validation datasets and loaders\n",
+        "image_datasets = {\n",
+        "    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])\n",
+        "    for x in [\"train\", \"val\"]\n",
+        "}\n",
+        "dataloaders = {\n",
+        "    x: torch.utils.data.DataLoader(\n",
+        "        image_datasets[x], batch_size=4, shuffle=True, num_workers=0\n",
+        "    )\n",
+        "    for x in [\"train\", \"val\"]\n",
+        "}\n",
+        "dataset_sizes = {x: len(image_datasets[x]) for x in [\"train\", \"val\"]}\n",
+        "class_names = image_datasets[\"train\"].classes\n",
+        "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
+        "\n",
+        "# Helper function for displaying images\n",
+        "def imshow(inp, title=None):\n",
+        "    \"\"\"Imshow for Tensor.\"\"\"\n",
+        "    inp = inp.numpy().transpose((1, 2, 0))\n",
+        "    mean = np.array([0.485, 0.456, 0.406])\n",
+        "    std = np.array([0.229, 0.224, 0.225])\n",
+        "\n",
+        "    # Un-normalize the images\n",
+        "    inp = std * inp + mean\n",
+        "    # Clip just in case\n",
+        "    inp = np.clip(inp, 0, 1)\n",
+        "    plt.imshow(inp)\n",
+        "    if title is not None:\n",
+        "        plt.title(title)\n",
+        "    plt.pause(0.001)  # pause a bit so that plots are updated\n",
+        "    plt.show()\n",
+        "\n",
+        "\n",
+        "# Get a batch of training data\n",
+        "inputs, classes = next(iter(dataloaders[\"train\"]))\n",
+        "\n",
+        "# Make a grid from batch\n",
+        "out = torchvision.utils.make_grid(inputs)\n",
+        "\n",
+        "imshow(out, title=[class_names[x] for x in classes])\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "bbd48800",
+      "metadata": {
+        "id": "bbd48800"
+      },
+      "source": [
+        "Now, execute the following code which uses a pre-trained model ResNet18 having replaced the output layer for the ants/bees classification and performs the model training by only changing the weights of this output layer."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "id": "572d824c",
+      "metadata": {
+        "id": "572d824c",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "7ab4cd64-f395-4f25-f574-eef36b1d68ce"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:617: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
+            "  warnings.warn(\n",
+            "/usr/local/lib/python3.10/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.\n",
+            "  warnings.warn(msg)\n",
+            "Downloading: \"https://download.pytorch.org/models/resnet18-f37072fd.pth\" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth\n",
+            "100%|██████████| 44.7M/44.7M [00:00<00:00, 220MB/s]\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/10\n",
+            "----------\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/torch/optim/lr_scheduler.py:224: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`.  Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "train Loss: 0.7472 Acc: 0.5738\n",
+            "val Loss: 0.3010 Acc: 0.8562\n",
+            "\n",
+            "Epoch 2/10\n",
+            "----------\n",
+            "train Loss: 0.4474 Acc: 0.7910\n",
+            "val Loss: 0.1926 Acc: 0.9542\n",
+            "\n",
+            "Epoch 3/10\n",
+            "----------\n",
+            "train Loss: 0.4452 Acc: 0.8033\n",
+            "val Loss: 0.2513 Acc: 0.9020\n",
+            "\n",
+            "Epoch 4/10\n",
+            "----------\n",
+            "train Loss: 0.3650 Acc: 0.8279\n",
+            "val Loss: 0.1967 Acc: 0.9542\n",
+            "\n",
+            "Epoch 5/10\n",
+            "----------\n",
+            "train Loss: 0.3299 Acc: 0.8607\n",
+            "val Loss: 0.1995 Acc: 0.9412\n",
+            "\n",
+            "Epoch 6/10\n",
+            "----------\n",
+            "train Loss: 0.4849 Acc: 0.7910\n",
+            "val Loss: 0.2128 Acc: 0.9346\n",
+            "\n",
+            "Epoch 7/10\n",
+            "----------\n",
+            "train Loss: 0.3027 Acc: 0.8689\n",
+            "val Loss: 0.2118 Acc: 0.9281\n",
+            "\n",
+            "Epoch 8/10\n",
+            "----------\n",
+            "train Loss: 0.3632 Acc: 0.8361\n",
+            "val Loss: 0.2298 Acc: 0.9281\n",
+            "\n",
+            "Epoch 9/10\n",
+            "----------\n",
+            "train Loss: 0.4297 Acc: 0.7992\n",
+            "val Loss: 0.2540 Acc: 0.9150\n",
+            "\n",
+            "Epoch 10/10\n",
+            "----------\n",
+            "train Loss: 0.3748 Acc: 0.8607\n",
+            "val Loss: 0.2135 Acc: 0.9346\n",
+            "\n",
+            "Training complete in 2m 14s\n",
+            "Best val Acc: 0.954248\n"
+          ]
+        }
+      ],
+      "source": [
+        "import copy\n",
+        "import os\n",
+        "import time\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import torch.optim as optim\n",
+        "import torchvision\n",
+        "from torch.optim import lr_scheduler\n",
+        "from torchvision import datasets, transforms\n",
+        "\n",
+        "# Data augmentation and normalization for training\n",
+        "# Just normalization for validation\n",
+        "data_transforms = {\n",
+        "    \"train\": transforms.Compose(\n",
+        "        [\n",
+        "            transforms.RandomResizedCrop(\n",
+        "                224\n",
+        "            ),  # ImageNet models were trained on 224x224 images\n",
+        "            transforms.RandomHorizontalFlip(),  # flip horizontally 50% of the time - increases train set variability\n",
+        "            transforms.ToTensor(),  # convert it to a PyTorch tensor\n",
+        "            transforms.Normalize(\n",
+        "                [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]\n",
+        "            ),  # ImageNet models expect this norm\n",
+        "        ]\n",
+        "    ),\n",
+        "    \"val\": transforms.Compose(\n",
+        "        [\n",
+        "            transforms.Resize(256),\n",
+        "            transforms.CenterCrop(224),\n",
+        "            transforms.ToTensor(),\n",
+        "            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n",
+        "        ]\n",
+        "    ),\n",
+        "}\n",
+        "\n",
+        "data_dir = \"/content/drive/MyDrive/hymenoptera_data\"\n",
+        "# Create train and validation datasets and loaders\n",
+        "image_datasets = {\n",
+        "    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])\n",
+        "    for x in [\"train\", \"val\"]\n",
+        "}\n",
+        "dataloaders = {\n",
+        "    x: torch.utils.data.DataLoader(\n",
+        "        image_datasets[x], batch_size=4, shuffle=True, num_workers=4\n",
+        "    )\n",
+        "    for x in [\"train\", \"val\"]\n",
+        "}\n",
+        "dataset_sizes = {x: len(image_datasets[x]) for x in [\"train\", \"val\"]}\n",
+        "class_names = image_datasets[\"train\"].classes\n",
+        "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
+        "\n",
+        "# Helper function for displaying images\n",
+        "def imshow(inp, title=None):\n",
+        "    \"\"\"Imshow for Tensor.\"\"\"\n",
+        "    inp = inp.numpy().transpose((1, 2, 0))\n",
+        "    mean = np.array([0.485, 0.456, 0.406])\n",
+        "    std = np.array([0.229, 0.224, 0.225])\n",
+        "\n",
+        "    # Un-normalize the images\n",
+        "    inp = std * inp + mean\n",
+        "    # Clip just in case\n",
+        "    inp = np.clip(inp, 0, 1)\n",
+        "    plt.imshow(inp)\n",
+        "    if title is not None:\n",
+        "        plt.title(title)\n",
+        "    plt.pause(0.001)  # pause a bit so that plots are updated\n",
+        "    plt.show()\n",
+        "\n",
+        "\n",
+        "# Get a batch of training data\n",
+        "# inputs, classes = next(iter(dataloaders['train']))\n",
+        "\n",
+        "# Make a grid from batch\n",
+        "# out = torchvision.utils.make_grid(inputs)\n",
+        "\n",
+        "# imshow(out, title=[class_names[x] for x in classes])\n",
+        "# training\n",
+        "\n",
+        "\n",
+        "def train_model(model, criterion, optimizer, scheduler, num_epochs=25):\n",
+        "    since = time.time()\n",
+        "\n",
+        "    best_model_wts = copy.deepcopy(model.state_dict())\n",
+        "    best_acc = 0.0\n",
+        "\n",
+        "    epoch_time = []  # we'll keep track of the time needed for each epoch\n",
+        "\n",
+        "    for epoch in range(num_epochs):\n",
+        "        epoch_start = time.time()\n",
+        "        print(\"Epoch {}/{}\".format(epoch + 1, num_epochs))\n",
+        "        print(\"-\" * 10)\n",
+        "\n",
+        "        # Each epoch has a training and validation phase\n",
+        "        for phase in [\"train\", \"val\"]:\n",
+        "            if phase == \"train\":\n",
+        "                scheduler.step()\n",
+        "                model.train()  # Set model to training mode\n",
+        "            else:\n",
+        "                model.eval()  # Set model to evaluate mode\n",
+        "\n",
+        "            running_loss = 0.0\n",
+        "            running_corrects = 0\n",
+        "\n",
+        "            # Iterate over data.\n",
+        "            for inputs, labels in dataloaders[phase]:\n",
+        "                inputs = inputs.to(device)\n",
+        "                labels = labels.to(device)\n",
+        "\n",
+        "                # zero the parameter gradients\n",
+        "                optimizer.zero_grad()\n",
+        "\n",
+        "                # Forward\n",
+        "                # Track history if only in training phase\n",
+        "                with torch.set_grad_enabled(phase == \"train\"):\n",
+        "                    outputs = model(inputs)\n",
+        "                    _, preds = torch.max(outputs, 1)\n",
+        "                    loss = criterion(outputs, labels)\n",
+        "\n",
+        "                    # backward + optimize only if in training phase\n",
+        "                    if phase == \"train\":\n",
+        "                        loss.backward()\n",
+        "                        optimizer.step()\n",
+        "\n",
+        "                # Statistics\n",
+        "                running_loss += loss.item() * inputs.size(0)\n",
+        "                running_corrects += torch.sum(preds == labels.data)\n",
+        "\n",
+        "            epoch_loss = running_loss / dataset_sizes[phase]\n",
+        "            epoch_acc = running_corrects.double() / dataset_sizes[phase]\n",
+        "\n",
+        "            print(\"{} Loss: {:.4f} Acc: {:.4f}\".format(phase, epoch_loss, epoch_acc))\n",
+        "\n",
+        "            # Deep copy the model\n",
+        "            if phase == \"val\" and epoch_acc > best_acc:\n",
+        "                best_acc = epoch_acc\n",
+        "                best_model_wts = copy.deepcopy(model.state_dict())\n",
+        "\n",
+        "        # Add the epoch time\n",
+        "        t_epoch = time.time() - epoch_start\n",
+        "        epoch_time.append(t_epoch)\n",
+        "        print()\n",
+        "\n",
+        "    time_elapsed = time.time() - since\n",
+        "    print(\n",
+        "        \"Training complete in {:.0f}m {:.0f}s\".format(\n",
+        "            time_elapsed // 60, time_elapsed % 60\n",
+        "        )\n",
+        "    )\n",
+        "    print(\"Best val Acc: {:4f}\".format(best_acc))\n",
+        "\n",
+        "    # Load best model weights\n",
+        "    model.load_state_dict(best_model_wts)\n",
+        "    return model, epoch_time\n",
+        "\n",
+        "\n",
+        "# Download a pre-trained ResNet18 model and freeze its weights\n",
+        "model = torchvision.models.resnet18(pretrained=True)\n",
+        "for param in model.parameters():\n",
+        "    param.requires_grad = False\n",
+        "\n",
+        "# Replace the final fully connected layer\n",
+        "# Parameters of newly constructed modules have requires_grad=True by default\n",
+        "num_ftrs = model.fc.in_features\n",
+        "model.fc = nn.Linear(num_ftrs, 2)\n",
+        "# Send the model to the GPU\n",
+        "model = model.to(device)\n",
+        "# Set the loss function\n",
+        "criterion = nn.CrossEntropyLoss()\n",
+        "\n",
+        "# Observe that only the parameters of the final layer are being optimized\n",
+        "optimizer_conv = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)\n",
+        "exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)\n",
+        "model, epoch_time = train_model(\n",
+        "    model, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=10\n",
+        ")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "sN7V8_EAn0OO",
+      "metadata": {
+        "id": "sN7V8_EAn0OO"
+      },
+      "source": [
+        "Experiments:\n",
+        "Study the code and the results obtained.\n",
+        "\n",
+        "Modify the code and add an \"eval_model\" function to allow\n",
+        "the evaluation of the model on a test set (different from the learning and validation sets used during the learning phase). Study the results obtained.\n",
+        "\n",
+        "Now modify the code to replace the current classification layer with a set of two layers using a \"relu\" activation function for the middle layer, and the \"dropout\" mechanism for both layers. Renew the experiments and study the results obtained.\n",
+        "\n",
+        "Apply ther quantization (post and quantization aware) and evaluate impact on model size and accuracy."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "We first need to load a new dataset and split it into a train test and validation set.  We dataset found on Kaggle containing a train folder and a validation folder.\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "XTs_3UyqU0EB"
+      },
+      "id": "XTs_3UyqU0EB"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import copy\n",
+        "import os\n",
+        "import time\n",
+        "\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import torch.optim as optim\n",
+        "import torchvision\n",
+        "from torch.optim import lr_scheduler\n",
+        "from torchvision import datasets, transforms\n",
+        "\n",
+        "# Data augmentation and normalization for training\n",
+        "# Just normalization for validation\n",
+        "data_transforms = {\n",
+        "    \"ant_bees\": transforms.Compose(\n",
+        "        [\n",
+        "            transforms.RandomResizedCrop(\n",
+        "                224\n",
+        "            ),  # ImageNet models were trained on 224x224 images\n",
+        "            transforms.RandomHorizontalFlip(),  # flip horizontally 50% of the time - increases train set variability\n",
+        "            transforms.ToTensor(),  # convert it to a PyTorch tensor\n",
+        "            transforms.Normalize(\n",
+        "                [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]\n",
+        "            ),  # ImageNet models expect this norm\n",
+        "        ]\n",
+        "    ),\n",
+        "}\n",
+        "\n",
+        "data_dir = \"/content/drive/MyDrive\"\n",
+        "# Create train and validation datasets and loaders\n",
+        "image_datasets = {\n",
+        "    x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])\n",
+        "    for x in [\"ant_bees\"]\n",
+        "}\n",
+        "dataloaders = {\n",
+        "    x: torch.utils.data.DataLoader(\n",
+        "        image_datasets[x], batch_size=4, shuffle=True, num_workers=4\n",
+        "    )\n",
+        "    for x in [\"ant_bees\"]\n",
+        "}\n",
+        "dataset_sizes = {x: len(image_datasets[x]) for x in [\"ant_bees\"]}\n",
+        "class_names = image_datasets[\"ant_bees\"].classes\n",
+        "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "tA0TM1zno5cY",
+        "outputId": "320fe225-269e-4d3f-ff11-01d531cca3ad"
+      },
+      "id": "tA0TM1zno5cY",
+      "execution_count": 11,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:617: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n",
+            "  warnings.warn(\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "id": "PnI7tSTqXRZ8",
+      "metadata": {
+        "id": "PnI7tSTqXRZ8",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "bd43177b-dd27-4982-9bc3-4e0555093d99"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Validation Loss: 0.1706 Accuracy: 0.9303\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(0.17056238605472884, tensor(0.9303, device='cuda:0', dtype=torch.float64))"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ],
+      "source": [
+        "# Data augmentation and normalization for training\n",
+        "# Just normalization for validation\n",
+        "\n",
+        "def eval_model(model, dataloader, criterion):\n",
+        "    model.eval()\n",
+        "\n",
+        "    running_loss = 0.0\n",
+        "    running_corrects = 0\n",
+        "\n",
+        "    for inputs, labels in dataloaders[\"ant_bees\"]:\n",
+        "        inputs = inputs.to(device)\n",
+        "        labels = labels.to(device)\n",
+        "\n",
+        "        outputs = model(inputs)\n",
+        "        _, preds = torch.max(outputs, 1)\n",
+        "        loss = criterion(outputs, labels)\n",
+        "\n",
+        "        running_loss += loss.item() * inputs.size(0)\n",
+        "        running_corrects += torch.sum(preds == labels.data)\n",
+        "\n",
+        "    total_loss = running_loss / dataset_sizes[\"ant_bees\"]\n",
+        "    total_accuracy = running_corrects.double() / dataset_sizes[\"ant_bees\"]\n",
+        "\n",
+        "    print(\"Validation Loss: {:.4f} Accuracy: {:.4f}\".format(total_loss, total_accuracy))\n",
+        "    return total_loss, total_accuracy\n",
+        "\n",
+        "eval_model(model, dataloaders, criterion)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "04a263f0",
+      "metadata": {
+        "id": "04a263f0"
+      },
+      "source": [
+        "## Optional\n",
+        "    \n",
+        "Try this at home!!\n",
+        "\n",
+        "\n",
+        "Pytorch offers a framework to export a given CNN to your selfphone (either android or iOS). Have a look at the tutorial https://pytorch.org/mobile/home/\n",
+        "\n",
+        "The Exercise consists in deploying the CNN of Exercise 4 in your phone and then test it on live.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "fe954ce4",
+      "metadata": {
+        "id": "fe954ce4"
+      },
+      "source": [
+        "## Author\n",
+        "\n",
+        "Alberto BOSIO - Ph. D."
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.5"
+    },
+    "vscode": {
+      "interpreter": {
+        "hash": "9e3efbebb05da2d4a1968abe9a0645745f54b63feb7a85a514e4da0495be97eb"
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
\ No newline at end of file