From 58dab060dca87b44440ef7623a313a200897f62f Mon Sep 17 00:00:00 2001
From: HeberArteagaJ <heberarteagajimenez@gmail.com>
Date: Thu, 21 Nov 2024 10:39:43 +0100
Subject: [PATCH] Exercise 1

---
 TD2 Deep Learning.ipynb | 842 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 813 insertions(+), 29 deletions(-)

diff --git a/TD2 Deep Learning.ipynb b/TD2 Deep Learning.ipynb
index 00e4fdc..68483a7 100644
--- a/TD2 Deep Learning.ipynb	
+++ b/TD2 Deep Learning.ipynb	
@@ -33,10 +33,54 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "330a42f5",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: torch in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (2.2.0)\n",
+      "Collecting torchvision\n",
+      "  Downloading torchvision-0.20.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.1 kB)\n",
+      "Requirement already satisfied: filelock in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from torch) (3.13.1)\n",
+      "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from torch) (4.9.0)\n",
+      "Requirement already satisfied: sympy in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from torch) (1.12)\n",
+      "Requirement already satisfied: networkx in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from torch) (3.2.1)\n",
+      "Requirement already satisfied: jinja2 in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from torch) (3.1.3)\n",
+      "Requirement already satisfied: fsspec in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from torch) (2024.2.0)\n",
+      "Requirement already satisfied: numpy in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from torchvision) (1.26.3)\n",
+      "Collecting torch\n",
+      "  Downloading torch-2.5.1-cp311-none-macosx_11_0_arm64.whl.metadata (28 kB)\n",
+      "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from torchvision) (10.2.0)\n",
+      "Collecting sympy==1.13.1 (from torch)\n",
+      "  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)\n",
+      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from sympy==1.13.1->torch) (1.3.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/heber/.pyenv/versions/3.11.7/lib/python3.11/site-packages (from jinja2->torch) (2.1.5)\n",
+      "Downloading torchvision-0.20.1-cp311-cp311-macosx_11_0_arm64.whl (1.8 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m827.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading torch-2.5.1-cp311-none-macosx_11_0_arm64.whl (63.9 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.9/63.9 MB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n",
+      "\u001b[?25hDownloading sympy-1.13.1-py3-none-any.whl (6.2 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n",
+      "\u001b[?25hInstalling collected packages: sympy, torch, torchvision\n",
+      "  Attempting uninstall: sympy\n",
+      "    Found existing installation: sympy 1.12\n",
+      "    Uninstalling sympy-1.12:\n",
+      "      Successfully uninstalled sympy-1.12\n",
+      "  Attempting uninstall: torch\n",
+      "    Found existing installation: torch 2.2.0\n",
+      "    Uninstalling torch-2.2.0:\n",
+      "      Successfully uninstalled torch-2.2.0\n",
+      "Successfully installed sympy-1.13.1 torch-2.5.1 torchvision-0.20.1\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
     "%pip install torch torchvision"
    ]
@@ -52,10 +96,72 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "b1950f0a",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[-0.4614,  0.2167,  1.3662,  0.5457,  2.7665,  0.8728, -0.1837,  0.0607,\n",
+      "          1.5946, -0.7726],\n",
+      "        [-0.8952,  0.7103, -0.7606,  0.9257, -0.1401,  0.5907,  0.7204,  1.3177,\n",
+      "         -0.4342,  0.4527],\n",
+      "        [ 0.7967,  0.1907, -0.5346,  1.4139, -0.5380, -2.1966,  0.4751,  1.4743,\n",
+      "          1.2449, -0.8389],\n",
+      "        [ 0.0833,  0.5977, -0.7399, -0.4702, -0.6887,  1.1328, -1.1584,  0.3544,\n",
+      "          1.0611, -0.0325],\n",
+      "        [ 0.5764, -0.5985, -1.0803, -0.7565, -1.0020,  1.7249, -0.6647,  0.7847,\n",
+      "          1.7402,  0.8243],\n",
+      "        [-0.9695,  0.5117,  1.9237,  1.7299,  1.0193,  0.3211, -0.5839,  0.5866,\n",
+      "          1.0019, -0.2681],\n",
+      "        [-0.4172, -2.3619, -1.1206, -0.7292,  0.9231, -0.3644,  0.6110,  1.3185,\n",
+      "          1.2674, -1.5235],\n",
+      "        [ 0.2213, -0.5554, -0.4785,  0.9106,  0.1333,  1.1237,  0.2859, -1.6737,\n",
+      "         -0.8616, -2.5445],\n",
+      "        [ 0.2351,  1.3325,  0.1848,  0.1473,  1.3133, -0.7523,  0.6736,  1.8610,\n",
+      "         -0.1847,  1.0223],\n",
+      "        [-0.6824, -0.0298, -0.1910,  1.4017, -1.9937,  0.4087,  0.0165,  1.7551,\n",
+      "         -0.6690, -0.7425],\n",
+      "        [-1.3005, -0.5498, -1.3494, -1.2090,  0.3210,  0.7386,  0.5926, -0.6941,\n",
+      "         -0.1688, -0.6065],\n",
+      "        [ 0.4044,  0.6994, -0.9141, -0.3529,  1.0734, -0.9639,  0.0657, -0.2253,\n",
+      "          0.3391,  0.5039],\n",
+      "        [-2.1911,  1.6130, -0.7344, -1.0796, -0.3465, -0.9285, -0.5405, -0.0072,\n",
+      "         -0.1058, -1.7597],\n",
+      "        [-1.4770,  0.3449,  0.6489,  1.7304, -0.0802, -0.0332, -0.2949,  0.2265,\n",
+      "         -0.7456,  0.8549]])\n",
+      "AlexNet(\n",
+      "  (features): Sequential(\n",
+      "    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))\n",
+      "    (1): ReLU(inplace=True)\n",
+      "    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n",
+      "    (4): ReLU(inplace=True)\n",
+      "    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (7): ReLU(inplace=True)\n",
+      "    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (9): ReLU(inplace=True)\n",
+      "    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (11): ReLU(inplace=True)\n",
+      "    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "  )\n",
+      "  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))\n",
+      "  (classifier): Sequential(\n",
+      "    (0): Dropout(p=0.5, inplace=False)\n",
+      "    (1): Linear(in_features=9216, out_features=4096, bias=True)\n",
+      "    (2): ReLU(inplace=True)\n",
+      "    (3): Dropout(p=0.5, inplace=False)\n",
+      "    (4): Linear(in_features=4096, out_features=4096, bias=True)\n",
+      "    (5): ReLU(inplace=True)\n",
+      "    (6): Linear(in_features=4096, out_features=1000, bias=True)\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
    "source": [
     "import torch\n",
     "\n",
@@ -95,10 +201,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "6e18f2fd",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CUDA is not available.  Training on CPU ...\n"
+     ]
+    }
+   ],
    "source": [
     "import torch\n",
     "\n",
@@ -111,6 +225,29 @@
     "    print(\"CUDA is available!  Training on GPU ...\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "abb4553c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([1.], device='mps:0')\n"
+     ]
+    }
+   ],
+   "source": [
+    "if torch.backends.mps.is_available():\n",
+    "    mps_device = torch.device(\"mps\")\n",
+    "    x = torch.ones(1, device=mps_device)\n",
+    "    print (x)\n",
+    "else:\n",
+    "    print (\"MPS device not found.\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "5cf214eb",
@@ -121,10 +258,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "462666a2",
+   "execution_count": 5,
+   "id": "711b0b8e",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from torchvision import datasets, transforms\n",
+    "from torch.utils.data.sampler import SubsetRandomSampler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "462666a2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Files already downloaded and verified\n",
+      "Files already downloaded and verified\n"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "from torchvision import datasets, transforms\n",
@@ -193,10 +351,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 32,
    "id": "317bf070",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Net(\n",
+      "  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))\n",
+      "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n",
+      "  (fc1): Linear(in_features=400, out_features=120, bias=True)\n",
+      "  (fc2): Linear(in_features=120, out_features=84, bias=True)\n",
+      "  (fc3): Linear(in_features=84, out_features=10, bias=True)\n",
+      ")\n"
+     ]
+    }
+   ],
    "source": [
     "import torch.nn as nn\n",
     "import torch.nn.functional as F\n",
@@ -242,10 +415,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "id": "4b53f229",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch: 0 \tTraining Loss: 28.707199 \tValidation Loss: 28.363214\n",
+      "Validation loss decreased (inf --> 28.363214).  Saving model ...\n",
+      "Epoch: 1 \tTraining Loss: 27.053440 \tValidation Loss: 26.921309\n",
+      "Validation loss decreased (28.363214 --> 26.921309).  Saving model ...\n",
+      "Epoch: 2 \tTraining Loss: 25.798181 \tValidation Loss: 25.484369\n",
+      "Validation loss decreased (26.921309 --> 25.484369).  Saving model ...\n",
+      "Epoch: 3 \tTraining Loss: 24.616021 \tValidation Loss: 25.825257\n",
+      "Epoch: 4 \tTraining Loss: 23.607140 \tValidation Loss: 24.406983\n",
+      "Validation loss decreased (25.484369 --> 24.406983).  Saving model ...\n",
+      "Epoch: 5 \tTraining Loss: 22.641223 \tValidation Loss: 23.463277\n",
+      "Validation loss decreased (24.406983 --> 23.463277).  Saving model ...\n",
+      "Epoch: 6 \tTraining Loss: 21.727461 \tValidation Loss: 23.323754\n",
+      "Validation loss decreased (23.463277 --> 23.323754).  Saving model ...\n",
+      "Epoch: 7 \tTraining Loss: 20.908013 \tValidation Loss: 22.815489\n",
+      "Validation loss decreased (23.323754 --> 22.815489).  Saving model ...\n",
+      "Epoch: 8 \tTraining Loss: 20.072570 \tValidation Loss: 22.468899\n",
+      "Validation loss decreased (22.815489 --> 22.468899).  Saving model ...\n",
+      "Epoch: 9 \tTraining Loss: 19.337123 \tValidation Loss: 23.307148\n",
+      "Epoch: 10 \tTraining Loss: 18.578279 \tValidation Loss: 22.322720\n",
+      "Validation loss decreased (22.468899 --> 22.322720).  Saving model ...\n",
+      "Epoch: 11 \tTraining Loss: 17.925301 \tValidation Loss: 22.491466\n",
+      "Epoch: 12 \tTraining Loss: 17.266396 \tValidation Loss: 22.145613\n",
+      "Validation loss decreased (22.322720 --> 22.145613).  Saving model ...\n",
+      "Epoch: 13 \tTraining Loss: 16.644972 \tValidation Loss: 21.923327\n",
+      "Validation loss decreased (22.145613 --> 21.923327).  Saving model ...\n",
+      "Epoch: 14 \tTraining Loss: 16.097757 \tValidation Loss: 22.242258\n",
+      "Epoch: 15 \tTraining Loss: 15.522903 \tValidation Loss: 22.269535\n",
+      "Epoch: 16 \tTraining Loss: 14.930308 \tValidation Loss: 23.073589\n",
+      "Epoch: 17 \tTraining Loss: 14.374154 \tValidation Loss: 23.190186\n",
+      "Epoch: 18 \tTraining Loss: 13.829007 \tValidation Loss: 23.638800\n",
+      "Epoch: 19 \tTraining Loss: 13.414001 \tValidation Loss: 25.147587\n",
+      "Epoch: 20 \tTraining Loss: 12.890743 \tValidation Loss: 24.385583\n",
+      "Epoch: 21 \tTraining Loss: 12.456227 \tValidation Loss: 24.933902\n",
+      "Epoch: 22 \tTraining Loss: 11.993389 \tValidation Loss: 25.289021\n",
+      "Epoch: 23 \tTraining Loss: 11.565563 \tValidation Loss: 26.004760\n",
+      "Epoch: 24 \tTraining Loss: 11.188692 \tValidation Loss: 26.451757\n",
+      "Epoch: 25 \tTraining Loss: 10.716678 \tValidation Loss: 27.236794\n",
+      "Epoch: 26 \tTraining Loss: 10.315807 \tValidation Loss: 27.493770\n",
+      "Epoch: 27 \tTraining Loss: 9.975283 \tValidation Loss: 27.571290\n",
+      "Epoch: 28 \tTraining Loss: 9.440035 \tValidation Loss: 29.006522\n",
+      "Epoch: 29 \tTraining Loss: 9.220511 \tValidation Loss: 29.190469\n"
+     ]
+    }
+   ],
    "source": [
     "import torch.optim as optim\n",
     "\n",
@@ -324,18 +545,179 @@
     "Does overfit occur? If so, do an early stopping."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "4e567158",
+   "metadata": {},
+   "source": [
+    "Yes, overfitting occurs. This is evident starting around Epoch 15, where the Validation Loss stops decreasing and begins to oscillate or increase, while the Training Loss continues to decrease. \n",
+    "This indicates the model is fitting too closely to the training data and failling to generalize well to the validation data.\n",
+    "By doing an early stopping, the training should stop around Epoch 15, where the Validation Loss reaches its minimum value of 21.882406. Continuing beyond this point does not improve validation performance and increases the risk of overfitting."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
+   "id": "11952c52",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch: 0 \tTraining Loss: 8.891932 \tValidation Loss: 30.875338\n",
+      "Validation loss decreased (inf --> 30.875338).  Saving model ...\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[35], line 35\u001b[0m\n\u001b[1;32m     33\u001b[0m loss \u001b[38;5;241m=\u001b[39m criterion(output, target)\n\u001b[1;32m     34\u001b[0m \u001b[38;5;66;03m# Backward pass: compute gradient of the loss with respect to model parameters\u001b[39;00m\n\u001b[0;32m---> 35\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     36\u001b[0m \u001b[38;5;66;03m# Perform a single optimization step (parameter update)\u001b[39;00m\n\u001b[1;32m     37\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mstep()\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/_tensor.py:581\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    571\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m    572\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m    573\u001b[0m         Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m    574\u001b[0m         (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    579\u001b[0m         inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m    580\u001b[0m     )\n\u001b[0;32m--> 581\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    582\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m    583\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/autograd/__init__.py:347\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    342\u001b[0m     retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m    344\u001b[0m \u001b[38;5;66;03m# The reason we repeat the same comment below is that\u001b[39;00m\n\u001b[1;32m    345\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m    346\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 347\u001b[0m \u001b[43m_engine_run_backward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    348\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    349\u001b[0m \u001b[43m    \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    350\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    351\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    352\u001b[0m \u001b[43m    \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    353\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    354\u001b[0m \u001b[43m    \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    355\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/autograd/graph.py:825\u001b[0m, in \u001b[0;36m_engine_run_backward\u001b[0;34m(t_outputs, *args, **kwargs)\u001b[0m\n\u001b[1;32m    823\u001b[0m     unregister_hooks \u001b[38;5;241m=\u001b[39m _register_logging_hooks_on_whole_graph(t_outputs)\n\u001b[1;32m    824\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 825\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m    826\u001b[0m \u001b[43m        \u001b[49m\u001b[43mt_outputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    827\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001b[39;00m\n\u001b[1;32m    828\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m    829\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m attach_logging_hooks:\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "# EARLY STOP\n",
+    "import torch.optim as optim\n",
+    "\n",
+    "min_epochs = 10\n",
+    "patience = 3 # Nb of epochs to wait after no improvement\n",
+    "epochs_no_improve = 0\n",
+    "\n",
+    "\n",
+    "criterion = nn.CrossEntropyLoss()  # specify loss function\n",
+    "optimizer = optim.SGD(model.parameters(), lr=0.01)  # specify optimizer\n",
+    "\n",
+    "n_epochs = 30  # number of epochs to train the model\n",
+    "valid_loss_list = []  # list to store validation loss to visualize\n",
+    "train_loss_list = []  # list to store trainloss to visualize\n",
+    "valid_loss_min = np.Inf  # track change in validation loss\n",
+    "\n",
+    "for epoch in range(n_epochs):\n",
+    "    # Keep track of training and validation loss\n",
+    "    train_loss = 0.0\n",
+    "    valid_loss = 0.0\n",
+    "\n",
+    "    # Train the model\n",
+    "    model.train()\n",
+    "    for data, target in train_loader:\n",
+    "        # Move tensors to GPU if CUDA is available\n",
+    "        if train_on_gpu:\n",
+    "            data, target = data.cuda(), target.cuda()\n",
+    "        # Clear the gradients of all optimized variables\n",
+    "        optimizer.zero_grad()\n",
+    "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+    "        output = model(data)\n",
+    "        # Calculate the batch loss\n",
+    "        loss = criterion(output, target)\n",
+    "        # Backward pass: compute gradient of the loss with respect to model parameters\n",
+    "        loss.backward()\n",
+    "        # Perform a single optimization step (parameter update)\n",
+    "        optimizer.step()\n",
+    "        # Update training loss\n",
+    "        train_loss += loss.item() * data.size(0)\n",
+    "\n",
+    "    # Validate the model\n",
+    "    model.eval()\n",
+    "    for data, target in valid_loader:\n",
+    "        # Move tensors to GPU if CUDA is available\n",
+    "        if train_on_gpu:\n",
+    "            data, target = data.cuda(), target.cuda()\n",
+    "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+    "        output = model(data)\n",
+    "        # Calculate the batch loss\n",
+    "        loss = criterion(output, target)\n",
+    "        # Update average validation loss\n",
+    "        valid_loss += loss.item() * data.size(0)\n",
+    "\n",
+    "    # Calculate average losses\n",
+    "    train_loss = train_loss / len(train_loader)\n",
+    "    valid_loss = valid_loss / len(valid_loader)\n",
+    "    train_loss_list.append(train_loss)\n",
+    "    valid_loss_list.append(valid_loss)\n",
+    "\n",
+    "    # Print training/validation statistics\n",
+    "    print(\n",
+    "        \"Epoch: {} \\tTraining Loss: {:.6f} \\tValidation Loss: {:.6f}\".format(\n",
+    "            epoch, train_loss, valid_loss\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "    # Save model if validation loss has decreased\n",
+    "    if valid_loss <= valid_loss_min:\n",
+    "        print(\n",
+    "            \"Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...\".format(\n",
+    "                valid_loss_min, valid_loss\n",
+    "            )\n",
+    "        )\n",
+    "        torch.save(model.state_dict(), \"model_cifar_1_early_stop.pt\")\n",
+    "        valid_loss_min = valid_loss\n",
+    "        epochs_no_improve = 0\n",
+    "    elif epoch >= min_epochs:\n",
+    "        epochs_no_improve += 1\n",
+    "        if epochs_no_improve >= patience:\n",
+    "            print(f\"Validation loss increased for {patience} times consecutives. Applying Early Stop.\")\n",
+    "            break\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
    "id": "d39df818",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.plot(range(len(train_loss_list)), train_loss_list)\n",
+    "plt.xlabel(\"Epoch\")\n",
+    "plt.ylabel(\"Train Loss\")\n",
+    "plt.title(\"Performance of Model 1\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "2111dfe9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "import matplotlib.pyplot as plt\n",
     "\n",
-    "plt.plot(range(n_epochs), train_loss_list)\n",
+    "plt.plot(range(len(valid_loss_list)), valid_loss_list)\n",
     "plt.xlabel(\"Epoch\")\n",
-    "plt.ylabel(\"Loss\")\n",
+    "plt.ylabel(\"Validation Loss\")\n",
     "plt.title(\"Performance of Model 1\")\n",
     "plt.show()"
    ]
@@ -353,9 +735,39 @@
    "execution_count": null,
    "id": "e93efdfc",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/qb/94v41qkx157gvjjjv1rchcr00000gn/T/ipykernel_25820/3291884398.py:1: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "  model.load_state_dict(torch.load(\"./model_cifar.pt\"))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Test Loss: 21.811477\n",
+      "\n",
+      "Test Accuracy of airplane: 71% (716/1000)\n",
+      "Test Accuracy of automobile: 75% (750/1000)\n",
+      "Test Accuracy of  bird: 55% (558/1000)\n",
+      "Test Accuracy of   cat: 44% (442/1000)\n",
+      "Test Accuracy of  deer: 60% (604/1000)\n",
+      "Test Accuracy of   dog: 52% (521/1000)\n",
+      "Test Accuracy of  frog: 64% (644/1000)\n",
+      "Test Accuracy of horse: 58% (588/1000)\n",
+      "Test Accuracy of  ship: 74% (746/1000)\n",
+      "Test Accuracy of truck: 68% (681/1000)\n",
+      "\n",
+      "Test Accuracy (Overall): 62% (6250/10000)\n"
+     ]
+    }
+   ],
    "source": [
-    "model.load_state_dict(torch.load(\"./model_cifar.pt\"))\n",
+    "# model.load_state_dict(torch.load(\"./model_cifar.pt\"))\n",
+    "model.load_state_dict(torch.load(\"./model_cifar_1_early_stop.pt\"))\n",
     "\n",
     "# track test loss\n",
     "test_loss = 0.0\n",
@@ -434,6 +846,337 @@
     "Compare the results obtained with this new network to those obtained previously."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "8b67c2c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "\n",
+    "# define the CNN architecture\n",
+    "\n",
+    "class NewNet(nn.Module):\n",
+    "    def __init__(self, dropout_value=0.5):\n",
+    "        super(NewNet, self).__init__()\n",
+    "        # Convolutional layers\n",
+    "        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)\n",
+    "        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)\n",
+    "        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)\n",
+    "        \n",
+    "        # MaxPool\n",
+    "        self.pool = nn.MaxPool2d(kernel_size=2)\n",
+    "\n",
+    "        # Dropout\n",
+    "        self.dropout = nn.Dropout(p=dropout_value)\n",
+    "        \n",
+    "        # Fully connected layers\n",
+    "        # self.fc1 = nn.Linear(in_features=64 * (input_size // 8) * (input_size // 8), out_features=512)\n",
+    "        self.fc1 = nn.Linear(in_features=64 * 4 * 4, out_features=512)\n",
+    "        self.fc2 = nn.Linear(in_features=512, out_features=64)\n",
+    "        self.fc3 = nn.Linear(64, 10)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        # Convolutional layers with ReLU and MaxPool\n",
+    "        x = self.pool(F.relu(self.conv1(x)))\n",
+    "        x = self.pool(F.relu(self.conv2(x)))\n",
+    "        x = self.pool(F.relu(self.conv3(x)))\n",
+    "\n",
+    "        x = x.view(x.size(0), -1)\n",
+    "        x = self.dropout(F.relu(self.fc1(x)))\n",
+    "        x = self.dropout(F.relu(self.fc2(x)))\n",
+    "        x = self.fc3(x)\n",
+    "        return x\n",
+    "\n",
+    "\n",
+    "# # create a complete CNN\n",
+    "# new_model = NewNet()\n",
+    "# print(new_model)\n",
+    "# # move tensors to GPU if CUDA is available\n",
+    "# if train_on_gpu:\n",
+    "#     new_model.cuda()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3cc6cc8a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "NewNet(\n",
+      "  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "  (dropout): Dropout(p=0.5, inplace=False)\n",
+      "  (fc1): Linear(in_features=1024, out_features=512, bias=True)\n",
+      "  (fc2): Linear(in_features=512, out_features=64, bias=True)\n",
+      "  (fc3): Linear(in_features=64, out_features=10, bias=True)\n",
+      ")\n",
+      "Epoch: 0 \tTraining Loss: 44.934554 \tValidation Loss: 40.292926\n",
+      "Validation loss decreased (inf --> 40.292926).  Saving model ...\n",
+      "Epoch: 1 \tTraining Loss: 38.547384 \tValidation Loss: 34.307230\n",
+      "Validation loss decreased (40.292926 --> 34.307230).  Saving model ...\n",
+      "Epoch: 2 \tTraining Loss: 34.167031 \tValidation Loss: 30.783441\n",
+      "Validation loss decreased (34.307230 --> 30.783441).  Saving model ...\n",
+      "Epoch: 3 \tTraining Loss: 31.514744 \tValidation Loss: 29.177271\n",
+      "Validation loss decreased (30.783441 --> 29.177271).  Saving model ...\n",
+      "Epoch: 4 \tTraining Loss: 29.490232 \tValidation Loss: 26.770098\n",
+      "Validation loss decreased (29.177271 --> 26.770098).  Saving model ...\n",
+      "Epoch: 5 \tTraining Loss: 27.982251 \tValidation Loss: 25.774428\n",
+      "Validation loss decreased (26.770098 --> 25.774428).  Saving model ...\n",
+      "Epoch: 6 \tTraining Loss: 26.515079 \tValidation Loss: 24.038370\n",
+      "Validation loss decreased (25.774428 --> 24.038370).  Saving model ...\n",
+      "Epoch: 7 \tTraining Loss: 25.259680 \tValidation Loss: 23.620053\n",
+      "Validation loss decreased (24.038370 --> 23.620053).  Saving model ...\n",
+      "Epoch: 8 \tTraining Loss: 23.969766 \tValidation Loss: 22.249926\n",
+      "Validation loss decreased (23.620053 --> 22.249926).  Saving model ...\n",
+      "Epoch: 9 \tTraining Loss: 23.044149 \tValidation Loss: 21.061266\n",
+      "Validation loss decreased (22.249926 --> 21.061266).  Saving model ...\n",
+      "Epoch: 10 \tTraining Loss: 21.929328 \tValidation Loss: 20.193573\n",
+      "Validation loss decreased (21.061266 --> 20.193573).  Saving model ...\n",
+      "Epoch: 11 \tTraining Loss: 21.162510 \tValidation Loss: 19.769918\n",
+      "Validation loss decreased (20.193573 --> 19.769918).  Saving model ...\n",
+      "Epoch: 12 \tTraining Loss: 20.163602 \tValidation Loss: 19.290062\n",
+      "Validation loss decreased (19.769918 --> 19.290062).  Saving model ...\n",
+      "Epoch: 13 \tTraining Loss: 19.370121 \tValidation Loss: 18.626375\n",
+      "Validation loss decreased (19.290062 --> 18.626375).  Saving model ...\n",
+      "Epoch: 14 \tTraining Loss: 18.558041 \tValidation Loss: 18.075628\n",
+      "Validation loss decreased (18.626375 --> 18.075628).  Saving model ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "# create a complete CNN\n",
+    "new_model = NewNet()\n",
+    "print(new_model)\n",
+    "# move tensors to GPU if CUDA is available\n",
+    "if train_on_gpu:\n",
+    "    new_model.cuda()\n",
+    "\n",
+    "\n",
+    "import torch.optim as optim\n",
+    "\n",
+    "min_epochs = 10\n",
+    "patience = 3 # Nb of epochs to wait after no improvement\n",
+    "epochs_no_improve = 0\n",
+    "\n",
+    "\n",
+    "criterion = nn.CrossEntropyLoss()  # specify loss function\n",
+    "optimizer = optim.SGD(new_model.parameters(), lr=0.01)  # specify optimizer\n",
+    "\n",
+    "n_epochs = 30  # number of epochs to train the model\n",
+    "valid_loss_list = []  # list to store validation loss to visualize\n",
+    "train_loss_list = []  # list to store trainloss to visualize\n",
+    "valid_loss_min = np.Inf  # track change in validation loss\n",
+    "\n",
+    "for epoch in range(n_epochs):\n",
+    "    # Keep track of training and validation loss\n",
+    "    train_loss = 0.0\n",
+    "    valid_loss = 0.0\n",
+    "\n",
+    "    # Train the model\n",
+    "    new_model.train()\n",
+    "    for data, target in train_loader:\n",
+    "        # Move tensors to GPU if CUDA is available\n",
+    "        if train_on_gpu:\n",
+    "            data, target = data.cuda(), target.cuda()\n",
+    "        # Clear the gradients of all optimized variables\n",
+    "        optimizer.zero_grad()\n",
+    "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+    "        output = new_model(data)\n",
+    "        # Calculate the batch loss\n",
+    "        loss = criterion(output, target)\n",
+    "        # Backward pass: compute gradient of the loss with respect to model parameters\n",
+    "        loss.backward()\n",
+    "        # Perform a single optimization step (parameter update)\n",
+    "        optimizer.step()\n",
+    "        # Update training loss\n",
+    "        train_loss += loss.item() * data.size(0)\n",
+    "\n",
+    "    # Validate the model\n",
+    "    new_model.eval()\n",
+    "    for data, target in valid_loader:\n",
+    "        # Move tensors to GPU if CUDA is available\n",
+    "        if train_on_gpu:\n",
+    "            data, target = data.cuda(), target.cuda()\n",
+    "        # Forward pass: compute predicted outputs by passing inputs to the model\n",
+    "        output = new_model(data)\n",
+    "        # Calculate the batch loss\n",
+    "        loss = criterion(output, target)\n",
+    "        # Update average validation loss\n",
+    "        valid_loss += loss.item() * data.size(0)\n",
+    "\n",
+    "    # Calculate average losses\n",
+    "    train_loss = train_loss / len(train_loader)\n",
+    "    valid_loss = valid_loss / len(valid_loader)\n",
+    "    train_loss_list.append(train_loss)\n",
+    "    valid_loss_list.append(valid_loss)\n",
+    "\n",
+    "    # Print training/validation statistics\n",
+    "    print(\n",
+    "        \"Epoch: {} \\tTraining Loss: {:.6f} \\tValidation Loss: {:.6f}\".format(\n",
+    "            epoch, train_loss, valid_loss\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "    # Save model if validation loss has decreased\n",
+    "    if valid_loss <= valid_loss_min:\n",
+    "        print(\n",
+    "            \"Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...\".format(\n",
+    "                valid_loss_min, valid_loss\n",
+    "            )\n",
+    "        )\n",
+    "        torch.save(new_model.state_dict(), \"model_cifar_2.pt\")\n",
+    "        valid_loss_min = valid_loss\n",
+    "        epochs_no_improve = 0\n",
+    "    elif epoch >= min_epochs:\n",
+    "        epochs_no_improve += 1\n",
+    "        if epochs_no_improve >= patience:\n",
+    "            print(f\"Validation loss increased for {patience} times consecutives. Applying Early Stop.\")\n",
+    "            break\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "97355006",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/qb/94v41qkx157gvjjjv1rchcr00000gn/T/ipykernel_32008/3634208260.py:1: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "  model.load_state_dict(torch.load(\"./model_cifar_2.pt\"))\n"
+     ]
+    },
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: './model_cifar_2.pt'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[31], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m model\u001b[38;5;241m.\u001b[39mload_state_dict(\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./model_cifar_2.pt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# track test loss\u001b[39;00m\n\u001b[1;32m      4\u001b[0m test_loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.0\u001b[39m\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/serialization.py:1319\u001b[0m, in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, weights_only, mmap, **pickle_load_args)\u001b[0m\n\u001b[1;32m   1316\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m pickle_load_args\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m   1317\u001b[0m     pickle_load_args[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1319\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43m_open_file_like\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m opened_file:\n\u001b[1;32m   1320\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m _is_zipfile(opened_file):\n\u001b[1;32m   1321\u001b[0m         \u001b[38;5;66;03m# The zipfile reader is going to advance the current file position.\u001b[39;00m\n\u001b[1;32m   1322\u001b[0m         \u001b[38;5;66;03m# If we want to actually tail call to torch.jit.load, we need to\u001b[39;00m\n\u001b[1;32m   1323\u001b[0m         \u001b[38;5;66;03m# reset back to the original position.\u001b[39;00m\n\u001b[1;32m   1324\u001b[0m         orig_position \u001b[38;5;241m=\u001b[39m opened_file\u001b[38;5;241m.\u001b[39mtell()\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/serialization.py:659\u001b[0m, in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m    657\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_open_file_like\u001b[39m(name_or_buffer, mode):\n\u001b[1;32m    658\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m _is_path(name_or_buffer):\n\u001b[0;32m--> 659\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_open_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    660\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    661\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m mode:\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/serialization.py:640\u001b[0m, in \u001b[0;36m_open_file.__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m    639\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name, mode):\n\u001b[0;32m--> 640\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './model_cifar_2.pt'"
+     ]
+    }
+   ],
+   "source": [
+    "model.load_state_dict(torch.load(\"./model_cifar_2.pt\"))\n",
+    "\n",
+    "# track test loss\n",
+    "test_loss = 0.0\n",
+    "class_correct = list(0.0 for i in range(10))\n",
+    "class_total = list(0.0 for i in range(10))\n",
+    "\n",
+    "model.eval()\n",
+    "# iterate over test data\n",
+    "for data, target in test_loader:\n",
+    "    # move tensors to GPU if CUDA is available\n",
+    "    if train_on_gpu:\n",
+    "        data, target = data.cuda(), target.cuda()\n",
+    "    # forward pass: compute predicted outputs by passing inputs to the model\n",
+    "    output = model(data)\n",
+    "    # calculate the batch loss\n",
+    "    loss = criterion(output, target)\n",
+    "    # update test loss\n",
+    "    test_loss += loss.item() * data.size(0)\n",
+    "    # convert output probabilities to predicted class\n",
+    "    _, pred = torch.max(output, 1)\n",
+    "    # compare predictions to true label\n",
+    "    correct_tensor = pred.eq(target.data.view_as(pred))\n",
+    "    correct = (\n",
+    "        np.squeeze(correct_tensor.numpy())\n",
+    "        if not train_on_gpu\n",
+    "        else np.squeeze(correct_tensor.cpu().numpy())\n",
+    "    )\n",
+    "    # calculate test accuracy for each object class\n",
+    "    for i in range(batch_size):\n",
+    "        label = target.data[i]\n",
+    "        class_correct[label] += correct[i].item()\n",
+    "        class_total[label] += 1\n",
+    "\n",
+    "# average test loss\n",
+    "test_loss = test_loss / len(test_loader)\n",
+    "print(\"Test Loss: {:.6f}\\n\".format(test_loss))\n",
+    "\n",
+    "for i in range(10):\n",
+    "    if class_total[i] > 0:\n",
+    "        print(\n",
+    "            \"Test Accuracy of %5s: %2d%% (%2d/%2d)\"\n",
+    "            % (\n",
+    "                classes[i],\n",
+    "                100 * class_correct[i] / class_total[i],\n",
+    "                np.sum(class_correct[i]),\n",
+    "                np.sum(class_total[i]),\n",
+    "            )\n",
+    "        )\n",
+    "    else:\n",
+    "        print(\"Test Accuracy of %5s: N/A (no training examples)\" % (classes[i]))\n",
+    "\n",
+    "print(\n",
+    "    \"\\nTest Accuracy (Overall): %2d%% (%2d/%2d)\"\n",
+    "    % (\n",
+    "        100.0 * np.sum(class_correct) / np.sum(class_total),\n",
+    "        np.sum(class_correct),\n",
+    "        np.sum(class_total),\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6245b27f",
+   "metadata": {},
+   "source": [
+    "# Test Accuracy: Model 1 v/s Model 2\n",
+    "\n",
+    "## Test Accuracy Model 1: \n",
+    "* Test Loss: 21.811477\n",
+    "\n",
+    "* Test Accuracy of airplane: 71% (716/1000)\n",
+    "* Test Accuracy of automobile: 75% (750/1000)\n",
+    "* Test Accuracy of  bird: 55% (558/1000)\n",
+    "* Test Accuracy of   cat: 44% (442/1000)\n",
+    "* Test Accuracy of  deer: 60% (604/1000)\n",
+    "* Test Accuracy of   dog: 52% (521/1000)\n",
+    "* Test Accuracy of  frog: 64% (644/1000)\n",
+    "* Test Accuracy of horse: 58% (588/1000)\n",
+    "* Test Accuracy of  ship: 74% (746/1000)\n",
+    "* Test Accuracy of truck: 68% (681/1000)\n",
+    "\n",
+    "* Test Accuracy (Overall): 62% (6250/10000)\n",
+    "\n",
+    "\n",
+    "## Test Accuracy Model 2:\n",
+    "* Test Loss: 16.239906\n",
+    "\n",
+    "* Test Accuracy of airplane: 78% (784/1000)\n",
+    "* Test Accuracy of automobile: 88% (889/1000)\n",
+    "* Test Accuracy of  bird: 61% (618/1000)\n",
+    "* Test Accuracy of   cat: 61% (615/1000)\n",
+    "* Test Accuracy of  deer: 66% (662/1000)\n",
+    "* Test Accuracy of   dog: 50% (509/1000)\n",
+    "* Test Accuracy of  frog: 82% (823/1000)\n",
+    "* Test Accuracy of horse: 73% (732/1000)\n",
+    "* Test Accuracy of  ship: 86% (862/1000)\n",
+    "* Test Accuracy of truck: 75% (751/1000)\n",
+    "\n",
+    "* Test Accuracy (Overall): 72% (7245/10000)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "bc381cf4",
@@ -451,10 +1194,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "id": "ef623c26",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "model:  fp32  \t Size (KB): 2330.946\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "2330946"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "import os\n",
     "\n",
@@ -483,15 +1244,43 @@
    "execution_count": null,
    "id": "c4c65d4b",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "RuntimeError",
+     "evalue": "Didn't find engine for operation quantized::linear_prepack NoQEngine",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[30], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mquantization\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m quantized_model \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquantization\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquantize_dynamic\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mqint8\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      5\u001b[0m print_size_of_model(quantized_model, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mint8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/ao/quantization/quantize.py:564\u001b[0m, in \u001b[0;36mquantize_dynamic\u001b[0;34m(model, qconfig_spec, dtype, mapping, inplace)\u001b[0m\n\u001b[1;32m    562\u001b[0m model\u001b[38;5;241m.\u001b[39meval()\n\u001b[1;32m    563\u001b[0m propagate_qconfig_(model, qconfig_spec)\n\u001b[0;32m--> 564\u001b[0m \u001b[43mconvert\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapping\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m    565\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/ao/quantization/quantize.py:659\u001b[0m, in \u001b[0;36mconvert\u001b[0;34m(module, mapping, inplace, remove_qconfig, is_reference, convert_custom_config_dict, use_precomputed_fake_quant)\u001b[0m\n\u001b[1;32m    657\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m inplace:\n\u001b[1;32m    658\u001b[0m     module \u001b[38;5;241m=\u001b[39m copy\u001b[38;5;241m.\u001b[39mdeepcopy(module)\n\u001b[0;32m--> 659\u001b[0m \u001b[43m_convert\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    660\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    661\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmapping\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    662\u001b[0m \u001b[43m    \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    663\u001b[0m \u001b[43m    \u001b[49m\u001b[43mis_reference\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_reference\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    664\u001b[0m \u001b[43m    \u001b[49m\u001b[43mconvert_custom_config_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconvert_custom_config_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    665\u001b[0m \u001b[43m    \u001b[49m\u001b[43muse_precomputed_fake_quant\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_precomputed_fake_quant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    666\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    667\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remove_qconfig:\n\u001b[1;32m    668\u001b[0m     _remove_qconfig(module)\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/ao/quantization/quantize.py:724\u001b[0m, in \u001b[0;36m_convert\u001b[0;34m(module, mapping, inplace, is_reference, convert_custom_config_dict, use_precomputed_fake_quant)\u001b[0m\n\u001b[1;32m    712\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m    713\u001b[0m         \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mod, _FusedModule)\n\u001b[1;32m    714\u001b[0m         \u001b[38;5;129;01mand\u001b[39;00m type_before_parametrizations(mod) \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m custom_module_class_mapping\n\u001b[1;32m    715\u001b[0m     ):\n\u001b[1;32m    716\u001b[0m         _convert(\n\u001b[1;32m    717\u001b[0m             mod,\n\u001b[1;32m    718\u001b[0m             mapping,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    722\u001b[0m             use_precomputed_fake_quant\u001b[38;5;241m=\u001b[39muse_precomputed_fake_quant,\n\u001b[1;32m    723\u001b[0m         )\n\u001b[0;32m--> 724\u001b[0m     reassign[name] \u001b[38;5;241m=\u001b[39m \u001b[43mswap_module\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    725\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmapping\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcustom_module_class_mapping\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_precomputed_fake_quant\u001b[49m\n\u001b[1;32m    726\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    728\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m reassign\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m    729\u001b[0m     module\u001b[38;5;241m.\u001b[39m_modules[key] \u001b[38;5;241m=\u001b[39m value\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/ao/quantization/quantize.py:766\u001b[0m, in \u001b[0;36mswap_module\u001b[0;34m(mod, mapping, custom_module_class_mapping, use_precomputed_fake_quant)\u001b[0m\n\u001b[1;32m    764\u001b[0m sig \u001b[38;5;241m=\u001b[39m inspect\u001b[38;5;241m.\u001b[39msignature(qmod\u001b[38;5;241m.\u001b[39mfrom_float)\n\u001b[1;32m    765\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muse_precomputed_fake_quant\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m sig\u001b[38;5;241m.\u001b[39mparameters:\n\u001b[0;32m--> 766\u001b[0m     new_mod \u001b[38;5;241m=\u001b[39m \u001b[43mqmod\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_float\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    767\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_precomputed_fake_quant\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_precomputed_fake_quant\u001b[49m\n\u001b[1;32m    768\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    769\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    770\u001b[0m     new_mod \u001b[38;5;241m=\u001b[39m qmod\u001b[38;5;241m.\u001b[39mfrom_float(mod)\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py:145\u001b[0m, in \u001b[0;36mLinear.from_float\u001b[0;34m(cls, mod, use_precomputed_fake_quant)\u001b[0m\n\u001b[1;32m    141\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    142\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m    143\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsupported dtype specified for dynamic quantized Linear!\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    144\u001b[0m     )\n\u001b[0;32m--> 145\u001b[0m qlinear \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmod\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43min_features\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmod\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mout_features\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    146\u001b[0m qlinear\u001b[38;5;241m.\u001b[39mset_weight_bias(qweight, mod\u001b[38;5;241m.\u001b[39mbias)\n\u001b[1;32m    147\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m qlinear\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/linear.py:42\u001b[0m, in \u001b[0;36mLinear.__init__\u001b[0;34m(self, in_features, out_features, bias_, dtype)\u001b[0m\n\u001b[1;32m     41\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, in_features, out_features, bias_\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mqint8):\n\u001b[0;32m---> 42\u001b[0m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43min_features\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout_features\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     43\u001b[0m     \u001b[38;5;66;03m# We don't muck around with buffers or attributes or anything here\u001b[39;00m\n\u001b[1;32m     44\u001b[0m     \u001b[38;5;66;03m# to keep the module simple. *everything* is simply a Python attribute.\u001b[39;00m\n\u001b[1;32m     45\u001b[0m     \u001b[38;5;66;03m# Serialization logic is explicitly handled in the below serialization and\u001b[39;00m\n\u001b[1;32m     46\u001b[0m     \u001b[38;5;66;03m# deserialization modules\u001b[39;00m\n\u001b[1;32m     47\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mversion \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m4\u001b[39m\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/linear.py:172\u001b[0m, in \u001b[0;36mLinear.__init__\u001b[0;34m(self, in_features, out_features, bias_, dtype)\u001b[0m\n\u001b[1;32m    169\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    170\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsupported dtype specified for quantized Linear!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 172\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_packed_params \u001b[38;5;241m=\u001b[39m \u001b[43mLinearPackedParams\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    173\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_packed_params\u001b[38;5;241m.\u001b[39mset_weight_bias(qweight, bias)\n\u001b[1;32m    174\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscale \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1.0\u001b[39m\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/linear.py:31\u001b[0m, in \u001b[0;36mLinearPackedParams.__init__\u001b[0;34m(self, dtype)\u001b[0m\n\u001b[1;32m     29\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39mfloat16:\n\u001b[1;32m     30\u001b[0m     wq \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mzeros([\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m], dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mfloat)\n\u001b[0;32m---> 31\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mset_weight_bias\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/linear.py:38\u001b[0m, in \u001b[0;36mLinearPackedParams.set_weight_bias\u001b[0;34m(self, weight, bias)\u001b[0m\n\u001b[1;32m     33\u001b[0m \u001b[38;5;129m@torch\u001b[39m\u001b[38;5;241m.\u001b[39mjit\u001b[38;5;241m.\u001b[39mexport\n\u001b[1;32m     34\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mset_weight_bias\u001b[39m(\n\u001b[1;32m     35\u001b[0m     \u001b[38;5;28mself\u001b[39m, weight: torch\u001b[38;5;241m.\u001b[39mTensor, bias: Optional[torch\u001b[38;5;241m.\u001b[39mTensor]\n\u001b[1;32m     36\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     37\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39mqint8:\n\u001b[0;32m---> 38\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_packed_params \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquantized\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear_prepack\u001b[49m\u001b[43m(\u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     39\u001b[0m     \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m torch\u001b[38;5;241m.\u001b[39mfloat16:\n\u001b[1;32m     40\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_packed_params \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mops\u001b[38;5;241m.\u001b[39mquantized\u001b[38;5;241m.\u001b[39mlinear_prepack_fp16(weight, bias)\n",
+      "File \u001b[0;32m~/.pyenv/versions/3.11.7/lib/python3.11/site-packages/torch/_ops.py:1116\u001b[0m, in \u001b[0;36mOpOverloadPacket.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1114\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_has_torchbind_op_overload \u001b[38;5;129;01mand\u001b[39;00m _must_dispatch_in_python(args, kwargs):\n\u001b[1;32m   1115\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m _call_overload_packet_from_python(\u001b[38;5;28mself\u001b[39m, args, kwargs)\n\u001b[0;32m-> 1116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_op\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: Didn't find engine for operation quantized::linear_prepack NoQEngine"
+     ]
+    }
+   ],
    "source": [
     "import torch.quantization\n",
     "\n",
-    "\n",
+    "torch.backends.quantized.engine = 'qnnpack'\n",
     "quantized_model = torch.quantization.quantize_dynamic(model, dtype=torch.qint8)\n",
     "print_size_of_model(quantized_model, \"int8\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "063d405c",
+   "metadata": {},
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "id": "7b108e17",
@@ -926,7 +1715,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.8.5 ('base')",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -940,12 +1729,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "9e3efbebb05da2d4a1968abe9a0645745f54b63feb7a85a514e4da0495be97eb"
-   }
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,
-- 
GitLab