diff --git a/TD2 Deep Learning.ipynb b/TD2 Deep Learning.ipynb
index 00e4fdc78c068248ca0742c64725d155b3681f0d..0b832d1bf9d9158033a621e289afdf64141c4f52 100644
--- a/TD2 Deep Learning.ipynb	
+++ b/TD2 Deep Learning.ipynb	
@@ -52,10 +52,72 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "b1950f0a",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[ 0.1159, -0.1632,  0.6574,  1.5902, -0.4352, -1.1418,  0.8810,  0.0847,\n",
+      "         -2.4290, -0.0911],\n",
+      "        [ 0.3400, -1.9606, -0.0214, -0.1179, -0.3917, -0.3592,  0.5251,  0.4169,\n",
+      "         -0.8085, -0.2057],\n",
+      "        [-0.7488,  0.7606,  0.1129, -2.6223, -0.5739, -0.4979,  2.0387,  0.1628,\n",
+      "          1.1597, -0.9275],\n",
+      "        [-1.5324,  1.4420,  0.9108,  0.4737,  0.3852, -1.1974,  1.7244,  1.3268,\n",
+      "          1.4552,  0.5241],\n",
+      "        [-0.3818, -0.4960, -1.5574, -0.8755,  1.2589,  0.8939,  0.0385, -2.5047,\n",
+      "          0.6804, -0.1951],\n",
+      "        [ 0.1988,  0.9232, -1.3031,  1.8143,  0.0756,  1.2082, -1.1921,  0.0647,\n",
+      "          0.1529,  0.4644],\n",
+      "        [ 1.8262,  0.6831, -0.1683, -0.8331, -0.5271, -0.2069,  0.5703,  1.7226,\n",
+      "         -0.6655, -0.4297],\n",
+      "        [-0.0630, -0.2216,  2.2132, -0.8788,  2.8345, -0.0534, -1.7918, -0.6061,\n",
+      "         -0.2461,  0.4126],\n",
+      "        [ 0.3832, -0.2473, -1.1898,  2.3250,  0.1655, -0.4416, -0.4937, -0.1714,\n",
+      "          0.6682, -0.7186],\n",
+      "        [-0.5843,  1.7539,  0.4247,  0.5102, -1.2161,  0.2732,  1.8955,  1.5722,\n",
+      "          0.9527,  0.2717],\n",
+      "        [-1.2976,  0.2779, -0.8085,  0.0037, -1.4008, -1.3840,  0.1210,  0.5056,\n",
+      "          0.6006, -1.5492],\n",
+      "        [-0.1415, -0.8489,  0.3045,  2.3843,  1.4306, -0.5467, -0.2279,  0.2920,\n",
+      "          1.5270, -1.5247],\n",
+      "        [-0.8661, -0.3661,  0.3478, -0.5955,  1.0730, -2.1341, -0.8818,  0.2842,\n",
+      "          0.8046,  0.4630],\n",
+      "        [-0.1986,  1.3981, -0.3965, -0.6231,  2.5136,  0.1703, -1.0520, -0.4539,\n",
+      "         -1.8835, -0.1314]])\n",
+      "AlexNet(\n",
+      "  (features): Sequential(\n",
+      "    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))\n",
+      "    (1): ReLU(inplace=True)\n",
+      "    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n",
+      "    (4): ReLU(inplace=True)\n",
+      "    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (7): ReLU(inplace=True)\n",
+      "    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (9): ReLU(inplace=True)\n",
+      "    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (11): ReLU(inplace=True)\n",
+      "    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "  )\n",
+      "  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))\n",
+      "  (classifier): Sequential(\n",
+      "    (0): Dropout(p=0.5, inplace=False)\n",
+      "    (1): Linear(in_features=9216, out_features=4096, bias=True)\n",
+      "    (2): ReLU(inplace=True)\n",
+      "    (3): Dropout(p=0.5, inplace=False)\n",
+      "    (4): Linear(in_features=4096, out_features=4096, bias=True)\n",
+      "    (5): ReLU(inplace=True)\n",
+      "    (6): Linear(in_features=4096, out_features=1000, bias=True)\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
    "source": [
     "import torch\n",
     "\n",
@@ -95,10 +157,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "6e18f2fd",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CUDA is not available.  Training on CPU ...\n"
+     ]
+    }
+   ],
    "source": [
     "import torch\n",
     "\n",
@@ -121,10 +191,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "462666a2",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data\\cifar-10-python.tar.gz\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100.0%\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Extracting data\\cifar-10-python.tar.gz to data\n",
+      "Files already downloaded and verified\n"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "from torchvision import datasets, transforms\n",
@@ -196,7 +289,22 @@
    "execution_count": null,
    "id": "317bf070",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Net(\n",
+      "  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))\n",
+      "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
+      "  (fc1): Linear(in_features=400, out_features=120, bias=True)\n",
+      "  (fc2): Linear(in_features=120, out_features=84, bias=True)\n",
+      "  (fc3): Linear(in_features=84, out_features=10, bias=True)\n",
+      ")\n"
+     ]
+    }
+   ],
    "source": [
     "import torch.nn as nn\n",
     "import torch.nn.functional as F\n",
@@ -232,6 +340,125 @@
     "    model.cuda()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "02ad19e0",
+   "metadata": {},
+   "source": [
+    "Creating the model to answer question 1: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "9f3145ca",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Net_3Conv_3lin(\n",
+      "  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "  (fc1): Linear(in_features=1024, out_features=512, bias=True)\n",
+      "  (fc2): Linear(in_features=512, out_features=64, bias=True)\n",
+      "  (fc3): Linear(in_features=64, out_features=10, bias=True)\n",
+      "  (dropout): Dropout(p=0.3, inplace=False)\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "class Net_3Conv_3lin(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(Net_3Conv_3lin, self).__init__()\n",
+    "        self.conv1 = nn.Conv2d(kernel_size=3 ,padding=1 ,in_channels=3, out_channels =16)\n",
+    "        self.conv2 = nn.Conv2d(kernel_size=3 ,padding=1,in_channels=16, out_channels =32 )\n",
+    "        self.conv3 = nn.Conv2d(kernel_size=3 ,padding=1,in_channels=32, out_channels =64 )\n",
+    "        self.pool = nn.MaxPool2d(2, 2)\n",
+    "        self.fc1 = nn.Linear(1024,512)\n",
+    "        self.fc2 = nn.Linear(512,64)\n",
+    "        self.fc3 = nn.Linear(64,10)\n",
+    "        self.p = 0.3\n",
+    "        self.dropout = nn.Dropout(self.p) \n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.pool(F.relu(self.conv1(x)))\n",
+    "        x = self.pool(F.relu(self.conv2(x)))\n",
+    "        x = self.pool(F.relu(self.conv3(x)))\n",
+    "        x = x.view(-1, 64 * 4 * 4)\n",
+    "        x = self.dropout(F.relu(self.fc1(x)))\n",
+    "        x = self.dropout(F.relu(self.fc2(x)))\n",
+    "        x = self.fc3(x)\n",
+    "        return x\n",
+    "\n",
+    "\n",
+    "# create a complete CNN\n",
+    "model = Net_3Conv_3lin()\n",
+    "print(model)\n",
+    "# move tensors to GPU if CUDA is available\n",
+    "if train_on_gpu:\n",
+    "    model.cuda()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Creating a model for training aware quatization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71bb37b5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "class Net_3Conv_3lin_Quant(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(Net_3Conv_3lin_Quant, self).__init__()\n",
+    "        self.quant = torch.ao.quantization.QuantStub()\n",
+    "        self.conv1 = nn.Conv2d(kernel_size=3 ,padding=1 ,in_channels=3, out_channels =16)\n",
+    "        self.conv2 = nn.Conv2d(kernel_size=3 ,padding=1,in_channels=16, out_channels =32 )\n",
+    "        self.conv3 = nn.Conv2d(kernel_size=3 ,padding=1,in_channels=32, out_channels =64 )\n",
+    "        self.pool = nn.MaxPool2d(2, 2)\n",
+    "        self.fc1 = nn.Linear(1024,512)\n",
+    "        self.fc2 = nn.Linear(512,64)\n",
+    "        self.fc3 = nn.Linear(64,10)\n",
+    "        self.p = 0.3\n",
+    "        self.dropout = nn.Dropout(self.p) \n",
+    "\n",
+    "        self.dequant = torch.ao.quantization.DeQuantStub()\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.quant(x)\n",
+    "        x = self.pool(F.relu(self.conv1(x)))\n",
+    "        x = self.pool(F.relu(self.conv2(x)))\n",
+    "        x = self.pool(F.relu(self.conv3(x)))\n",
+    "        x = x.view(-1, 64 * 4 * 4)\n",
+    "        x = self.dropout(F.relu(self.fc1(x)))\n",
+    "        x = self.dropout(F.relu(self.fc2(x)))\n",
+    "        x = self.fc3(x)\n",
+    "        x = self.dequant(x)\n",
+    "        return x\n",
+    "\n",
+    "\n",
+    "# create a complete CNN\n",
+    "model = Net_3Conv_3lin_Quant()\n",
+    "print(model)\n",
+    "# move tensors to GPU if CUDA is available\n",
+    "if train_on_gpu:\n",
+    "    model.cuda()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "a2dc4974",
@@ -242,10 +469,78 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "id": "4b53f229",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch: 0 \tTraining Loss: 45.872737 \tValidation Loss: 44.489702\n",
+      "Validation loss decreased (inf --> 44.489702).  Saving model ...\n",
+      "Epoch: 1 \tTraining Loss: 40.061390 \tValidation Loss: 36.100894\n",
+      "Validation loss decreased (44.489702 --> 36.100894).  Saving model ...\n",
+      "Epoch: 2 \tTraining Loss: 34.138645 \tValidation Loss: 31.113222\n",
+      "Validation loss decreased (36.100894 --> 31.113222).  Saving model ...\n",
+      "Epoch: 3 \tTraining Loss: 30.507358 \tValidation Loss: 28.677294\n",
+      "Validation loss decreased (31.113222 --> 28.677294).  Saving model ...\n",
+      "Epoch: 4 \tTraining Loss: 28.486533 \tValidation Loss: 26.945912\n",
+      "Validation loss decreased (28.677294 --> 26.945912).  Saving model ...\n",
+      "Epoch: 5 \tTraining Loss: 26.688108 \tValidation Loss: 25.087968\n",
+      "Validation loss decreased (26.945912 --> 25.087968).  Saving model ...\n",
+      "Epoch: 6 \tTraining Loss: 24.921677 \tValidation Loss: 23.867951\n",
+      "Validation loss decreased (25.087968 --> 23.867951).  Saving model ...\n",
+      "Epoch: 7 \tTraining Loss: 23.352516 \tValidation Loss: 21.780911\n",
+      "Validation loss decreased (23.867951 --> 21.780911).  Saving model ...\n",
+      "Epoch: 8 \tTraining Loss: 21.794870 \tValidation Loss: 21.096160\n",
+      "Validation loss decreased (21.780911 --> 21.096160).  Saving model ...\n",
+      "Epoch: 9 \tTraining Loss: 20.547996 \tValidation Loss: 19.826372\n",
+      "Validation loss decreased (21.096160 --> 19.826372).  Saving model ...\n",
+      "Epoch: 10 \tTraining Loss: 19.401682 \tValidation Loss: 19.596204\n",
+      "Validation loss decreased (19.826372 --> 19.596204).  Saving model ...\n",
+      "Epoch: 11 \tTraining Loss: 18.340276 \tValidation Loss: 18.632437\n",
+      "Validation loss decreased (19.596204 --> 18.632437).  Saving model ...\n",
+      "Epoch: 12 \tTraining Loss: 17.266555 \tValidation Loss: 17.758480\n",
+      "Validation loss decreased (18.632437 --> 17.758480).  Saving model ...\n",
+      "Epoch: 13 \tTraining Loss: 16.353216 \tValidation Loss: 17.932480\n",
+      "Epoch: 14 \tTraining Loss: 15.507940 \tValidation Loss: 16.795444\n",
+      "Validation loss decreased (17.758480 --> 16.795444).  Saving model ...\n",
+      "Epoch: 15 \tTraining Loss: 14.657860 \tValidation Loss: 16.382975\n",
+      "Validation loss decreased (16.795444 --> 16.382975).  Saving model ...\n",
+      "Epoch: 16 \tTraining Loss: 13.861092 \tValidation Loss: 16.670121\n",
+      "Epoch: 17 \tTraining Loss: 12.984836 \tValidation Loss: 16.962824\n",
+      "Epoch: 18 \tTraining Loss: 12.257837 \tValidation Loss: 16.400703\n",
+      "Epoch: 19 \tTraining Loss: 11.436899 \tValidation Loss: 16.729391\n",
+      "Epoch: 20 \tTraining Loss: 10.915463 \tValidation Loss: 16.299635\n",
+      "Validation loss decreased (16.382975 --> 16.299635).  Saving model ...\n",
+      "Epoch: 21 \tTraining Loss: 10.233074 \tValidation Loss: 16.345074\n",
+      "Epoch: 22 \tTraining Loss: 9.549847 \tValidation Loss: 16.705205\n",
+      "Epoch: 23 \tTraining Loss: 8.865565 \tValidation Loss: 16.583533\n",
+      "Epoch: 24 \tTraining Loss: 8.210216 \tValidation Loss: 17.112398\n",
+      "Epoch: 25 \tTraining Loss: 7.781697 \tValidation Loss: 17.192360\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[34], line 41\u001b[0m\n\u001b[0;32m     39\u001b[0m     data, target \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mcuda(), target\u001b[38;5;241m.\u001b[39mcuda()\n\u001b[0;32m     40\u001b[0m \u001b[38;5;66;03m# Forward pass: compute predicted outputs by passing inputs to the model\u001b[39;00m\n\u001b[1;32m---> 41\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     42\u001b[0m \u001b[38;5;66;03m# Calculate the batch loss\u001b[39;00m\n\u001b[0;32m     43\u001b[0m loss \u001b[38;5;241m=\u001b[39m criterion(output, target)\n",
+      "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1734\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m   1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m   1745\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1746\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m   1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
+      "Cell \u001b[1;32mIn[33], line 16\u001b[0m, in \u001b[0;36mNet_3Conv_3lin.forward\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[0;32m     15\u001b[0m     x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool(F\u001b[38;5;241m.\u001b[39mrelu(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconv1(x)))\n\u001b[1;32m---> 16\u001b[0m     x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpool\u001b[49m\u001b[43m(\u001b[49m\u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrelu\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     17\u001b[0m     x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool(F\u001b[38;5;241m.\u001b[39mrelu(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconv3(x)))\n\u001b[0;32m     18\u001b[0m     x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mview(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m64\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m4\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m4\u001b[39m)\n",
+      "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1734\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m   1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m   1745\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1746\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m   1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
+      "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\pooling.py:213\u001b[0m, in \u001b[0;36mMaxPool2d.forward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m    212\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor):\n\u001b[1;32m--> 213\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_pool2d\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    214\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m    215\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkernel_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    216\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    217\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    218\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    219\u001b[0m \u001b[43m        \u001b[49m\u001b[43mceil_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mceil_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    220\u001b[0m \u001b[43m        \u001b[49m\u001b[43mreturn_indices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreturn_indices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    221\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\_jit_internal.py:624\u001b[0m, in \u001b[0;36mboolean_dispatch.<locals>.fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    622\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m if_true(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    623\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 624\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mif_false\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\functional.py:830\u001b[0m, in \u001b[0;36m_max_pool2d\u001b[1;34m(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices)\u001b[0m\n\u001b[0;32m    828\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stride \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    829\u001b[0m     stride \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mjit\u001b[38;5;241m.\u001b[39mannotate(List[\u001b[38;5;28mint\u001b[39m], [])\n\u001b[1;32m--> 830\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_pool2d\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkernel_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mceil_mode\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
    "source": [
     "import torch.optim as optim\n",
     "\n",
@@ -254,7 +549,7 @@
     "\n",
     "n_epochs = 30  # number of epochs to train the model\n",
     "train_loss_list = []  # list to store loss to visualize\n",
-    "valid_loss_min = np.Inf  # track change in validation loss\n",
+    "valid_loss_min = np.inf  # track change in validation loss\n",
     "\n",
     "for epoch in range(n_epochs):\n",
     "    # Keep track of training and validation loss\n",
@@ -326,17 +621,44 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "id": "d39df818",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Net_3Conv_3lin(\n",
+      "  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "  (fc1): Linear(in_features=1024, out_features=512, bias=True)\n",
+      "  (fc2): Linear(in_features=512, out_features=64, bias=True)\n",
+      "  (fc3): Linear(in_features=64, out_features=10, bias=True)\n",
+      "  (dropout): Dropout(p=0.3, inplace=False)\n",
+      ")\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "\n",
-    "plt.plot(range(n_epochs), train_loss_list)\n",
+    "print(model)\n",
+    "plt.plot(range(len(train_loss_list)), train_loss_list)\n",
     "plt.xlabel(\"Epoch\")\n",
     "plt.ylabel(\"Loss\")\n",
-    "plt.title(\"Performance of Model 1\")\n",
+    "plt.title(\"Performance of Model 2\")\n",
     "plt.show()"
    ]
   },
@@ -350,10 +672,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "id": "e93efdfc",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\xxpod\\AppData\\Local\\Temp\\ipykernel_18828\\3291884398.py:1: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "  model.load_state_dict(torch.load(\"./model_cifar.pt\"))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Test Loss: 16.123924\n",
+      "\n",
+      "Test Accuracy of airplane: 81% (810/1000)\n",
+      "Test Accuracy of automobile: 85% (855/1000)\n",
+      "Test Accuracy of  bird: 63% (633/1000)\n",
+      "Test Accuracy of   cat: 52% (525/1000)\n",
+      "Test Accuracy of  deer: 69% (695/1000)\n",
+      "Test Accuracy of   dog: 71% (717/1000)\n",
+      "Test Accuracy of  frog: 77% (772/1000)\n",
+      "Test Accuracy of horse: 77% (772/1000)\n",
+      "Test Accuracy of  ship: 84% (843/1000)\n",
+      "Test Accuracy of truck: 76% (765/1000)\n",
+      "\n",
+      "Test Accuracy (Overall): 73% (7387/10000)\n"
+     ]
+    }
+   ],
    "source": [
     "model.load_state_dict(torch.load(\"./model_cifar.pt\"))\n",
     "\n",
@@ -431,7 +782,63 @@
     "- The first fully connected layer will have an output size of 512.\n",
     "- The second fully connected layer will have an output size of 64.\n",
     "\n",
-    "Compare the results obtained with this new network to those obtained previously."
+    "Compare the results obtained with this new network to those obtained previously.\n",
+    "\n",
+    "ANSWER: The model is built above and named Net_Conv3_Lin3\n",
+    "\n",
+    "\n",
+    "Results for the previous model : \n",
+    "\n",
+    "we osberve overfitting from about the 10nth Epoch - validation loss plateaued at 22 but training loss kept on decreasing to 10 , as can be seen from the training logs:\n",
+    "\n",
+    "Epoch: 7 \tTraining Loss: 23.183946 \tValidation Loss: 24.331222\n",
+    "Validation loss decreased (25.691083 --> 24.331222).  Saving model ...\n",
+    "\n",
+    "Epoch: 8 \tTraining Loss: 22.215979 \tValidation Loss: 23.632853\n",
+    "Validation loss decreased (24.331222 --> 23.632853).  Saving model ...\n",
+    "\n",
+    "Epoch: 9 \tTraining Loss: 21.408623 \tValidation Loss: 23.475442\n",
+    "Validation loss decreased (23.632853 --> 23.475442).  Saving model ...\n",
+    "\n",
+    "Epoch: 10 \tTraining Loss: 20.637072 \tValidation Loss: 23.639358\n",
+    "\n",
+    "Epoch: 11 \tTraining Loss: 19.877338 \tValidation Loss: 22.408472\n",
+    "Validation loss decreased (23.475442 --> 22.408472).  Saving model ...\n",
+    "\n",
+    "Epoch: 12 \tTraining Loss: 19.188079 \tValidation Loss: 23.296445\n",
+    "\n",
+    "Epoch: 13 \tTraining Loss: 18.647543 \tValidation Loss: 22.897815\n",
+    "\n",
+    "Epoch: 14 \tTraining Loss: 17.989626 \tValidation Loss: 22.755968\n",
+    "\n",
+    "the performance is as follow: ![alt text](perf_model_1.png)\n",
+    "\n",
+    "and the final accuries were:\n",
+    "![Accuracies](final_accuracy_first_model.png)\n",
+    "\n",
+    "\n",
+    "SECOND MODEL:\n",
+    "\n",
+    "for the second model, the validation loss goes lower, thougth in addition to the architectural changes, there are also just more weigth and it is longer to train.\n",
+    "\n",
+    "we archieve a valisation loss of 16, and the model is still improving after a larger number of epoch ( 20 vs 10)\n",
+    "\n",
+    "\n",
+    "here are the final accuracies:\n",
+    "Test Loss: 16.123924\n",
+    "\n",
+    "Test Accuracy of airplane: 81% (810/1000)\n",
+    "Test Accuracy of automobile: 85% (855/1000)\n",
+    "Test Accuracy of  bird: 63% (633/1000)\n",
+    "Test Accuracy of   cat: 52% (525/1000)\n",
+    "Test Accuracy of  deer: 69% (695/1000)\n",
+    "Test Accuracy of   dog: 71% (717/1000)\n",
+    "Test Accuracy of  frog: 77% (772/1000)\n",
+    "Test Accuracy of horse: 77% (772/1000)\n",
+    "Test Accuracy of  ship: 84% (843/1000)\n",
+    "Test Accuracy of truck: 76% (765/1000)\n",
+    "\n",
+    "Test Accuracy (Overall): 73% (7387/10000)"
    ]
   },
   {
@@ -500,6 +907,33 @@
     "For each class, compare the classification test accuracy of the initial model and the quantized model. Also give the overall test accuracy for both models."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1458a562",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = Net_3Conv_3lin_Quant()\n",
+    "print(model)\n",
+    "\n",
+    "\n",
+    "#prepare for fusion\n",
+    "model.eval()\n",
+    "#model.qconfig = torch.ao.quantization.get_default_qat_config('86')\n",
+    "#model_fused = torch.ao.quantization.fuse_modules(model,[['conv', 'relu',]])\n",
+    "\n",
+    "model.train()\n",
+    "model_prepared = torch.ao.quantization.prepare_qat(model)\n",
+    "training_loop(model_prepared)\n",
+    "model_prepared.eval()\n",
+    "model_quantized = torch.ao.quantization.convert(model_prepared)\n",
+    "\n",
+    "evaluate(model_quantized)\n",
+    "print_size_of_model(model_quantized)\n",
+    "\n"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "a0a34b90",
@@ -926,7 +1360,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.8.5 ('base')",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -940,12 +1374,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "9e3efbebb05da2d4a1968abe9a0645745f54b63feb7a85a514e4da0495be97eb"
-   }
+   "version": "3.12.2"
   }
  },
  "nbformat": 4,
diff --git a/final_accuracy_first_model.png b/final_accuracy_first_model.png
new file mode 100644
index 0000000000000000000000000000000000000000..f75b599e2f3cc7da8200291bb1fefe87dc2a4eb4
Binary files /dev/null and b/final_accuracy_first_model.png differ
diff --git a/perf_model_1.png b/perf_model_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..39000fab2d6965fc177cde2bf0b8e672e54ff26e
Binary files /dev/null and b/perf_model_1.png differ