diff --git a/TD2 Deep Learning.ipynb b/TD2 Deep Learning.ipynb index 00e4fdc78c068248ca0742c64725d155b3681f0d..0b832d1bf9d9158033a621e289afdf64141c4f52 100644 --- a/TD2 Deep Learning.ipynb +++ b/TD2 Deep Learning.ipynb @@ -52,10 +52,72 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "b1950f0a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[ 0.1159, -0.1632, 0.6574, 1.5902, -0.4352, -1.1418, 0.8810, 0.0847,\n", + " -2.4290, -0.0911],\n", + " [ 0.3400, -1.9606, -0.0214, -0.1179, -0.3917, -0.3592, 0.5251, 0.4169,\n", + " -0.8085, -0.2057],\n", + " [-0.7488, 0.7606, 0.1129, -2.6223, -0.5739, -0.4979, 2.0387, 0.1628,\n", + " 1.1597, -0.9275],\n", + " [-1.5324, 1.4420, 0.9108, 0.4737, 0.3852, -1.1974, 1.7244, 1.3268,\n", + " 1.4552, 0.5241],\n", + " [-0.3818, -0.4960, -1.5574, -0.8755, 1.2589, 0.8939, 0.0385, -2.5047,\n", + " 0.6804, -0.1951],\n", + " [ 0.1988, 0.9232, -1.3031, 1.8143, 0.0756, 1.2082, -1.1921, 0.0647,\n", + " 0.1529, 0.4644],\n", + " [ 1.8262, 0.6831, -0.1683, -0.8331, -0.5271, -0.2069, 0.5703, 1.7226,\n", + " -0.6655, -0.4297],\n", + " [-0.0630, -0.2216, 2.2132, -0.8788, 2.8345, -0.0534, -1.7918, -0.6061,\n", + " -0.2461, 0.4126],\n", + " [ 0.3832, -0.2473, -1.1898, 2.3250, 0.1655, -0.4416, -0.4937, -0.1714,\n", + " 0.6682, -0.7186],\n", + " [-0.5843, 1.7539, 0.4247, 0.5102, -1.2161, 0.2732, 1.8955, 1.5722,\n", + " 0.9527, 0.2717],\n", + " [-1.2976, 0.2779, -0.8085, 0.0037, -1.4008, -1.3840, 0.1210, 0.5056,\n", + " 0.6006, -1.5492],\n", + " [-0.1415, -0.8489, 0.3045, 2.3843, 1.4306, -0.5467, -0.2279, 0.2920,\n", + " 1.5270, -1.5247],\n", + " [-0.8661, -0.3661, 0.3478, -0.5955, 1.0730, -2.1341, -0.8818, 0.2842,\n", + " 0.8046, 0.4630],\n", + " [-0.1986, 1.3981, -0.3965, -0.6231, 2.5136, 0.1703, -1.0520, -0.4539,\n", + " -1.8835, -0.1314]])\n", + "AlexNet(\n", + " (features): Sequential(\n", + " (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))\n", + " (1): ReLU(inplace=True)\n", + " (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n", + " (4): ReLU(inplace=True)\n", + " (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (7): ReLU(inplace=True)\n", + " (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (9): ReLU(inplace=True)\n", + " (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (11): ReLU(inplace=True)\n", + " (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " )\n", + " (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))\n", + " (classifier): Sequential(\n", + " (0): Dropout(p=0.5, inplace=False)\n", + " (1): Linear(in_features=9216, out_features=4096, bias=True)\n", + " (2): ReLU(inplace=True)\n", + " (3): Dropout(p=0.5, inplace=False)\n", + " (4): Linear(in_features=4096, out_features=4096, bias=True)\n", + " (5): ReLU(inplace=True)\n", + " (6): Linear(in_features=4096, out_features=1000, bias=True)\n", + " )\n", + ")\n" + ] + } + ], "source": [ "import torch\n", "\n", @@ -95,10 +157,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "6e18f2fd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CUDA is not available. Training on CPU ...\n" + ] + } + ], "source": [ "import torch\n", "\n", @@ -121,10 +191,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "462666a2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data\\cifar-10-python.tar.gz\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100.0%\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data\\cifar-10-python.tar.gz to data\n", + "Files already downloaded and verified\n" + ] + } + ], "source": [ "import numpy as np\n", "from torchvision import datasets, transforms\n", @@ -196,7 +289,22 @@ "execution_count": null, "id": "317bf070", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Net(\n", + " (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))\n", + " (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n", + " (fc1): Linear(in_features=400, out_features=120, bias=True)\n", + " (fc2): Linear(in_features=120, out_features=84, bias=True)\n", + " (fc3): Linear(in_features=84, out_features=10, bias=True)\n", + ")\n" + ] + } + ], "source": [ "import torch.nn as nn\n", "import torch.nn.functional as F\n", @@ -232,6 +340,125 @@ " model.cuda()" ] }, + { + "cell_type": "markdown", + "id": "02ad19e0", + "metadata": {}, + "source": [ + "Creating the model to answer question 1: " + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "9f3145ca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Net_3Conv_3lin(\n", + " (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (fc1): Linear(in_features=1024, out_features=512, bias=True)\n", + " (fc2): Linear(in_features=512, out_features=64, bias=True)\n", + " (fc3): Linear(in_features=64, out_features=10, bias=True)\n", + " (dropout): Dropout(p=0.3, inplace=False)\n", + ")\n" + ] + } + ], + "source": [ + "\n", + "\n", + "class Net_3Conv_3lin(nn.Module):\n", + " def __init__(self):\n", + " super(Net_3Conv_3lin, self).__init__()\n", + " self.conv1 = nn.Conv2d(kernel_size=3 ,padding=1 ,in_channels=3, out_channels =16)\n", + " self.conv2 = nn.Conv2d(kernel_size=3 ,padding=1,in_channels=16, out_channels =32 )\n", + " self.conv3 = nn.Conv2d(kernel_size=3 ,padding=1,in_channels=32, out_channels =64 )\n", + " self.pool = nn.MaxPool2d(2, 2)\n", + " self.fc1 = nn.Linear(1024,512)\n", + " self.fc2 = nn.Linear(512,64)\n", + " self.fc3 = nn.Linear(64,10)\n", + " self.p = 0.3\n", + " self.dropout = nn.Dropout(self.p) \n", + "\n", + " def forward(self, x):\n", + " x = self.pool(F.relu(self.conv1(x)))\n", + " x = self.pool(F.relu(self.conv2(x)))\n", + " x = self.pool(F.relu(self.conv3(x)))\n", + " x = x.view(-1, 64 * 4 * 4)\n", + " x = self.dropout(F.relu(self.fc1(x)))\n", + " x = self.dropout(F.relu(self.fc2(x)))\n", + " x = self.fc3(x)\n", + " return x\n", + "\n", + "\n", + "# create a complete CNN\n", + "model = Net_3Conv_3lin()\n", + "print(model)\n", + "# move tensors to GPU if CUDA is available\n", + "if train_on_gpu:\n", + " model.cuda()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creating a model for training aware quatization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71bb37b5", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "class Net_3Conv_3lin_Quant(nn.Module):\n", + " def __init__(self):\n", + " super(Net_3Conv_3lin_Quant, self).__init__()\n", + " self.quant = torch.ao.quantization.QuantStub()\n", + " self.conv1 = nn.Conv2d(kernel_size=3 ,padding=1 ,in_channels=3, out_channels =16)\n", + " self.conv2 = nn.Conv2d(kernel_size=3 ,padding=1,in_channels=16, out_channels =32 )\n", + " self.conv3 = nn.Conv2d(kernel_size=3 ,padding=1,in_channels=32, out_channels =64 )\n", + " self.pool = nn.MaxPool2d(2, 2)\n", + " self.fc1 = nn.Linear(1024,512)\n", + " self.fc2 = nn.Linear(512,64)\n", + " self.fc3 = nn.Linear(64,10)\n", + " self.p = 0.3\n", + " self.dropout = nn.Dropout(self.p) \n", + "\n", + " self.dequant = torch.ao.quantization.DeQuantStub()\n", + "\n", + " def forward(self, x):\n", + " x = self.quant(x)\n", + " x = self.pool(F.relu(self.conv1(x)))\n", + " x = self.pool(F.relu(self.conv2(x)))\n", + " x = self.pool(F.relu(self.conv3(x)))\n", + " x = x.view(-1, 64 * 4 * 4)\n", + " x = self.dropout(F.relu(self.fc1(x)))\n", + " x = self.dropout(F.relu(self.fc2(x)))\n", + " x = self.fc3(x)\n", + " x = self.dequant(x)\n", + " return x\n", + "\n", + "\n", + "# create a complete CNN\n", + "model = Net_3Conv_3lin_Quant()\n", + "print(model)\n", + "# move tensors to GPU if CUDA is available\n", + "if train_on_gpu:\n", + " model.cuda()" + ] + }, { "cell_type": "markdown", "id": "a2dc4974", @@ -242,10 +469,78 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "4b53f229", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 0 \tTraining Loss: 45.872737 \tValidation Loss: 44.489702\n", + "Validation loss decreased (inf --> 44.489702). Saving model ...\n", + "Epoch: 1 \tTraining Loss: 40.061390 \tValidation Loss: 36.100894\n", + "Validation loss decreased (44.489702 --> 36.100894). Saving model ...\n", + "Epoch: 2 \tTraining Loss: 34.138645 \tValidation Loss: 31.113222\n", + "Validation loss decreased (36.100894 --> 31.113222). Saving model ...\n", + "Epoch: 3 \tTraining Loss: 30.507358 \tValidation Loss: 28.677294\n", + "Validation loss decreased (31.113222 --> 28.677294). Saving model ...\n", + "Epoch: 4 \tTraining Loss: 28.486533 \tValidation Loss: 26.945912\n", + "Validation loss decreased (28.677294 --> 26.945912). Saving model ...\n", + "Epoch: 5 \tTraining Loss: 26.688108 \tValidation Loss: 25.087968\n", + "Validation loss decreased (26.945912 --> 25.087968). Saving model ...\n", + "Epoch: 6 \tTraining Loss: 24.921677 \tValidation Loss: 23.867951\n", + "Validation loss decreased (25.087968 --> 23.867951). Saving model ...\n", + "Epoch: 7 \tTraining Loss: 23.352516 \tValidation Loss: 21.780911\n", + "Validation loss decreased (23.867951 --> 21.780911). Saving model ...\n", + "Epoch: 8 \tTraining Loss: 21.794870 \tValidation Loss: 21.096160\n", + "Validation loss decreased (21.780911 --> 21.096160). Saving model ...\n", + "Epoch: 9 \tTraining Loss: 20.547996 \tValidation Loss: 19.826372\n", + "Validation loss decreased (21.096160 --> 19.826372). Saving model ...\n", + "Epoch: 10 \tTraining Loss: 19.401682 \tValidation Loss: 19.596204\n", + "Validation loss decreased (19.826372 --> 19.596204). Saving model ...\n", + "Epoch: 11 \tTraining Loss: 18.340276 \tValidation Loss: 18.632437\n", + "Validation loss decreased (19.596204 --> 18.632437). Saving model ...\n", + "Epoch: 12 \tTraining Loss: 17.266555 \tValidation Loss: 17.758480\n", + "Validation loss decreased (18.632437 --> 17.758480). Saving model ...\n", + "Epoch: 13 \tTraining Loss: 16.353216 \tValidation Loss: 17.932480\n", + "Epoch: 14 \tTraining Loss: 15.507940 \tValidation Loss: 16.795444\n", + "Validation loss decreased (17.758480 --> 16.795444). Saving model ...\n", + "Epoch: 15 \tTraining Loss: 14.657860 \tValidation Loss: 16.382975\n", + "Validation loss decreased (16.795444 --> 16.382975). Saving model ...\n", + "Epoch: 16 \tTraining Loss: 13.861092 \tValidation Loss: 16.670121\n", + "Epoch: 17 \tTraining Loss: 12.984836 \tValidation Loss: 16.962824\n", + "Epoch: 18 \tTraining Loss: 12.257837 \tValidation Loss: 16.400703\n", + "Epoch: 19 \tTraining Loss: 11.436899 \tValidation Loss: 16.729391\n", + "Epoch: 20 \tTraining Loss: 10.915463 \tValidation Loss: 16.299635\n", + "Validation loss decreased (16.382975 --> 16.299635). Saving model ...\n", + "Epoch: 21 \tTraining Loss: 10.233074 \tValidation Loss: 16.345074\n", + "Epoch: 22 \tTraining Loss: 9.549847 \tValidation Loss: 16.705205\n", + "Epoch: 23 \tTraining Loss: 8.865565 \tValidation Loss: 16.583533\n", + "Epoch: 24 \tTraining Loss: 8.210216 \tValidation Loss: 17.112398\n", + "Epoch: 25 \tTraining Loss: 7.781697 \tValidation Loss: 17.192360\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[34], line 41\u001b[0m\n\u001b[0;32m 39\u001b[0m data, target \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mcuda(), target\u001b[38;5;241m.\u001b[39mcuda()\n\u001b[0;32m 40\u001b[0m \u001b[38;5;66;03m# Forward pass: compute predicted outputs by passing inputs to the model\u001b[39;00m\n\u001b[1;32m---> 41\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 42\u001b[0m \u001b[38;5;66;03m# Calculate the batch loss\u001b[39;00m\n\u001b[0;32m 43\u001b[0m loss \u001b[38;5;241m=\u001b[39m criterion(output, target)\n", + "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "Cell \u001b[1;32mIn[33], line 16\u001b[0m, in \u001b[0;36mNet_3Conv_3lin.forward\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[0;32m 15\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool(F\u001b[38;5;241m.\u001b[39mrelu(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconv1(x)))\n\u001b[1;32m---> 16\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpool\u001b[49m\u001b[43m(\u001b[49m\u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrelu\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 17\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool(F\u001b[38;5;241m.\u001b[39mrelu(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconv3(x)))\n\u001b[0;32m 18\u001b[0m x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mview(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m64\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m4\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m4\u001b[39m)\n", + "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\modules\\pooling.py:213\u001b[0m, in \u001b[0;36mMaxPool2d.forward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m 212\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor):\n\u001b[1;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_pool2d\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkernel_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 216\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 217\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 219\u001b[0m \u001b[43m \u001b[49m\u001b[43mceil_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mceil_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_indices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreturn_indices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 221\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\_jit_internal.py:624\u001b[0m, in \u001b[0;36mboolean_dispatch.<locals>.fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 622\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m if_true(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 623\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 624\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mif_false\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\xxpod\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\torch\\nn\\functional.py:830\u001b[0m, in \u001b[0;36m_max_pool2d\u001b[1;34m(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices)\u001b[0m\n\u001b[0;32m 828\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stride \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 829\u001b[0m stride \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mjit\u001b[38;5;241m.\u001b[39mannotate(List[\u001b[38;5;28mint\u001b[39m], [])\n\u001b[1;32m--> 830\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_pool2d\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkernel_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mceil_mode\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], "source": [ "import torch.optim as optim\n", "\n", @@ -254,7 +549,7 @@ "\n", "n_epochs = 30 # number of epochs to train the model\n", "train_loss_list = [] # list to store loss to visualize\n", - "valid_loss_min = np.Inf # track change in validation loss\n", + "valid_loss_min = np.inf # track change in validation loss\n", "\n", "for epoch in range(n_epochs):\n", " # Keep track of training and validation loss\n", @@ -326,17 +621,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "d39df818", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Net_3Conv_3lin(\n", + " (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (fc1): Linear(in_features=1024, out_features=512, bias=True)\n", + " (fc2): Linear(in_features=512, out_features=64, bias=True)\n", + " (fc3): Linear(in_features=64, out_features=10, bias=True)\n", + " (dropout): Dropout(p=0.3, inplace=False)\n", + ")\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "import matplotlib.pyplot as plt\n", - "\n", - "plt.plot(range(n_epochs), train_loss_list)\n", + "print(model)\n", + "plt.plot(range(len(train_loss_list)), train_loss_list)\n", "plt.xlabel(\"Epoch\")\n", "plt.ylabel(\"Loss\")\n", - "plt.title(\"Performance of Model 1\")\n", + "plt.title(\"Performance of Model 2\")\n", "plt.show()" ] }, @@ -350,10 +672,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "e93efdfc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\xxpod\\AppData\\Local\\Temp\\ipykernel_18828\\3291884398.py:1: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " model.load_state_dict(torch.load(\"./model_cifar.pt\"))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Loss: 16.123924\n", + "\n", + "Test Accuracy of airplane: 81% (810/1000)\n", + "Test Accuracy of automobile: 85% (855/1000)\n", + "Test Accuracy of bird: 63% (633/1000)\n", + "Test Accuracy of cat: 52% (525/1000)\n", + "Test Accuracy of deer: 69% (695/1000)\n", + "Test Accuracy of dog: 71% (717/1000)\n", + "Test Accuracy of frog: 77% (772/1000)\n", + "Test Accuracy of horse: 77% (772/1000)\n", + "Test Accuracy of ship: 84% (843/1000)\n", + "Test Accuracy of truck: 76% (765/1000)\n", + "\n", + "Test Accuracy (Overall): 73% (7387/10000)\n" + ] + } + ], "source": [ "model.load_state_dict(torch.load(\"./model_cifar.pt\"))\n", "\n", @@ -431,7 +782,63 @@ "- The first fully connected layer will have an output size of 512.\n", "- The second fully connected layer will have an output size of 64.\n", "\n", - "Compare the results obtained with this new network to those obtained previously." + "Compare the results obtained with this new network to those obtained previously.\n", + "\n", + "ANSWER: The model is built above and named Net_Conv3_Lin3\n", + "\n", + "\n", + "Results for the previous model : \n", + "\n", + "we osberve overfitting from about the 10nth Epoch - validation loss plateaued at 22 but training loss kept on decreasing to 10 , as can be seen from the training logs:\n", + "\n", + "Epoch: 7 \tTraining Loss: 23.183946 \tValidation Loss: 24.331222\n", + "Validation loss decreased (25.691083 --> 24.331222). Saving model ...\n", + "\n", + "Epoch: 8 \tTraining Loss: 22.215979 \tValidation Loss: 23.632853\n", + "Validation loss decreased (24.331222 --> 23.632853). Saving model ...\n", + "\n", + "Epoch: 9 \tTraining Loss: 21.408623 \tValidation Loss: 23.475442\n", + "Validation loss decreased (23.632853 --> 23.475442). Saving model ...\n", + "\n", + "Epoch: 10 \tTraining Loss: 20.637072 \tValidation Loss: 23.639358\n", + "\n", + "Epoch: 11 \tTraining Loss: 19.877338 \tValidation Loss: 22.408472\n", + "Validation loss decreased (23.475442 --> 22.408472). Saving model ...\n", + "\n", + "Epoch: 12 \tTraining Loss: 19.188079 \tValidation Loss: 23.296445\n", + "\n", + "Epoch: 13 \tTraining Loss: 18.647543 \tValidation Loss: 22.897815\n", + "\n", + "Epoch: 14 \tTraining Loss: 17.989626 \tValidation Loss: 22.755968\n", + "\n", + "the performance is as follow: \n", + "\n", + "and the final accuries were:\n", + "\n", + "\n", + "\n", + "SECOND MODEL:\n", + "\n", + "for the second model, the validation loss goes lower, thougth in addition to the architectural changes, there are also just more weigth and it is longer to train.\n", + "\n", + "we archieve a valisation loss of 16, and the model is still improving after a larger number of epoch ( 20 vs 10)\n", + "\n", + "\n", + "here are the final accuracies:\n", + "Test Loss: 16.123924\n", + "\n", + "Test Accuracy of airplane: 81% (810/1000)\n", + "Test Accuracy of automobile: 85% (855/1000)\n", + "Test Accuracy of bird: 63% (633/1000)\n", + "Test Accuracy of cat: 52% (525/1000)\n", + "Test Accuracy of deer: 69% (695/1000)\n", + "Test Accuracy of dog: 71% (717/1000)\n", + "Test Accuracy of frog: 77% (772/1000)\n", + "Test Accuracy of horse: 77% (772/1000)\n", + "Test Accuracy of ship: 84% (843/1000)\n", + "Test Accuracy of truck: 76% (765/1000)\n", + "\n", + "Test Accuracy (Overall): 73% (7387/10000)" ] }, { @@ -500,6 +907,33 @@ "For each class, compare the classification test accuracy of the initial model and the quantized model. Also give the overall test accuracy for both models." ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "1458a562", + "metadata": {}, + "outputs": [], + "source": [ + "model = Net_3Conv_3lin_Quant()\n", + "print(model)\n", + "\n", + "\n", + "#prepare for fusion\n", + "model.eval()\n", + "#model.qconfig = torch.ao.quantization.get_default_qat_config('86')\n", + "#model_fused = torch.ao.quantization.fuse_modules(model,[['conv', 'relu',]])\n", + "\n", + "model.train()\n", + "model_prepared = torch.ao.quantization.prepare_qat(model)\n", + "training_loop(model_prepared)\n", + "model_prepared.eval()\n", + "model_quantized = torch.ao.quantization.convert(model_prepared)\n", + "\n", + "evaluate(model_quantized)\n", + "print_size_of_model(model_quantized)\n", + "\n" + ] + }, { "cell_type": "markdown", "id": "a0a34b90", @@ -926,7 +1360,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.5 ('base')", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -940,12 +1374,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" - }, - "vscode": { - "interpreter": { - "hash": "9e3efbebb05da2d4a1968abe9a0645745f54b63feb7a85a514e4da0495be97eb" - } + "version": "3.12.2" } }, "nbformat": 4, diff --git a/final_accuracy_first_model.png b/final_accuracy_first_model.png new file mode 100644 index 0000000000000000000000000000000000000000..f75b599e2f3cc7da8200291bb1fefe87dc2a4eb4 Binary files /dev/null and b/final_accuracy_first_model.png differ diff --git a/perf_model_1.png b/perf_model_1.png new file mode 100644 index 0000000000000000000000000000000000000000..39000fab2d6965fc177cde2bf0b8e672e54ff26e Binary files /dev/null and b/perf_model_1.png differ