diff --git a/TD2 Deep Learning.ipynb b/TD2 Deep Learning.ipynb index 00e4fdc78c068248ca0742c64725d155b3681f0d..29d74ba658ec152359784553fbe04edac9886aca 100644 --- a/TD2 Deep Learning.ipynb +++ b/TD2 Deep Learning.ipynb @@ -33,10 +33,52 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "330a42f5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting torch\n", + " Downloading torch-2.2.2-cp311-none-macosx_10_9_x86_64.whl (150.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m150.8/150.8 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting torchvision\n", + " Downloading torchvision-0.17.2-cp311-cp311-macosx_10_13_x86_64.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting filelock\n", + " Downloading filelock-3.16.1-py3-none-any.whl (16 kB)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/youcefkessi/Library/Python/3.11/lib/python/site-packages (from torch) (4.12.2)\n", + "Collecting sympy\n", + " Downloading sympy-1.13.3-py3-none-any.whl (6.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting networkx\n", + " Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting jinja2\n", + " Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)\n", + "Collecting fsspec\n", + " Downloading fsspec-2024.10.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.6/179.6 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.11/site-packages (from torchvision) (1.24.3)\n", + "Collecting pillow!=8.3.*,>=5.3.0\n", + " Downloading pillow-11.0.0-cp311-cp311-macosx_10_10_x86_64.whl (3.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.2/3.2 MB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting MarkupSafe>=2.0\n", + " Downloading MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl (14 kB)\n", + "Collecting mpmath<1.4,>=1.1.0\n", + " Downloading mpmath-1.3.0-py3-none-any.whl (536 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m536.2/536.2 kB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hInstalling collected packages: mpmath, sympy, pillow, networkx, MarkupSafe, fsspec, filelock, jinja2, torch, torchvision\n", + "Successfully installed MarkupSafe-3.0.2 filelock-3.16.1 fsspec-2024.10.0 jinja2-3.1.4 mpmath-1.3.0 networkx-3.4.2 pillow-11.0.0 sympy-1.13.3 torch-2.2.2 torchvision-0.17.2\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "%pip install torch torchvision" ] @@ -52,10 +94,72 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "b1950f0a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[ 0.1382, 1.9183, 0.7486, -0.0892, -2.3127, -0.4286, 0.9899, 0.3222,\n", + " -0.5292, 0.9530],\n", + " [-0.0399, -0.7634, -0.0202, -1.0406, 1.5965, 0.3020, 1.1578, -0.9197,\n", + " -0.8526, 0.7884],\n", + " [-0.7136, 0.5239, 0.6765, -1.0706, -0.2336, -0.3412, -0.6827, -0.3706,\n", + " -0.0637, 1.2051],\n", + " [-0.0992, -0.7170, 0.5943, -0.9738, 1.0573, 0.1999, 0.7378, 0.8637,\n", + " -0.9122, -0.6693],\n", + " [-0.2444, 0.0670, 0.5475, 0.4482, 0.1415, 0.2580, 0.5002, -0.6960,\n", + " -0.2279, -1.1721],\n", + " [-0.1691, -1.6504, -0.0027, 0.6255, 1.1239, -1.3190, 0.5333, -0.0546,\n", + " -0.8585, 1.7737],\n", + " [-0.4489, 0.6278, 0.1549, -0.8478, -0.2015, -0.0471, 1.5053, 0.4634,\n", + " 1.2918, -0.8495],\n", + " [-0.2506, 0.6510, 0.1217, 1.2895, 0.2822, -1.3349, -0.3043, -0.1663,\n", + " 0.7939, -0.8179],\n", + " [ 1.6928, -0.6658, 1.1638, -0.7703, -1.7227, -0.1917, 0.7573, -0.3033,\n", + " 0.1029, 0.1487],\n", + " [ 0.3055, -0.1415, -0.9022, 0.1677, 0.1224, -0.4547, 0.7145, -1.1752,\n", + " -1.5985, -1.0561],\n", + " [-0.2934, -0.8208, 0.1982, 1.0242, 0.2430, -1.7429, 0.0303, -1.4033,\n", + " 0.0555, -0.4570],\n", + " [-1.4168, -0.3432, -0.2924, 0.7476, -0.0764, -0.3595, 0.0453, -0.9992,\n", + " -1.5226, 0.6131],\n", + " [ 0.5159, 1.9472, -1.5132, 1.4804, 1.0425, 1.6437, -0.7786, 0.0994,\n", + " -0.6477, -0.0729],\n", + " [-0.2263, -0.2381, 0.7496, 0.9289, 0.2377, -0.1626, -0.9950, 0.1790,\n", + " 0.9828, -1.4597]])\n", + "AlexNet(\n", + " (features): Sequential(\n", + " (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))\n", + " (1): ReLU(inplace=True)\n", + " (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n", + " (4): ReLU(inplace=True)\n", + " (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (7): ReLU(inplace=True)\n", + " (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (9): ReLU(inplace=True)\n", + " (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (11): ReLU(inplace=True)\n", + " (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " )\n", + " (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))\n", + " (classifier): Sequential(\n", + " (0): Dropout(p=0.5, inplace=False)\n", + " (1): Linear(in_features=9216, out_features=4096, bias=True)\n", + " (2): ReLU(inplace=True)\n", + " (3): Dropout(p=0.5, inplace=False)\n", + " (4): Linear(in_features=4096, out_features=4096, bias=True)\n", + " (5): ReLU(inplace=True)\n", + " (6): Linear(in_features=4096, out_features=1000, bias=True)\n", + " )\n", + ")\n" + ] + } + ], "source": [ "import torch\n", "\n", @@ -95,10 +199,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "6e18f2fd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CUDA is not available. Training on CPU ...\n" + ] + } + ], "source": [ "import torch\n", "\n", @@ -121,10 +233,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "462666a2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100.0%\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting data/cifar-10-python.tar.gz to data\n", + "Files already downloaded and verified\n" + ] + } + ], "source": [ "import numpy as np\n", "from torchvision import datasets, transforms\n", @@ -193,10 +328,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "317bf070", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Net(\n", + " (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))\n", + " (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))\n", + " (fc1): Linear(in_features=400, out_features=120, bias=True)\n", + " (fc2): Linear(in_features=120, out_features=84, bias=True)\n", + " (fc3): Linear(in_features=84, out_features=10, bias=True)\n", + ")\n" + ] + } + ], "source": [ "import torch.nn as nn\n", "import torch.nn.functional as F\n", @@ -242,10 +392,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "4b53f229", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 0 \tTraining Loss: 43.554762 \tValidation Loss: 38.943442\n", + "Validation loss decreased (inf --> 38.943442). Saving model ...\n", + "Epoch: 1 \tTraining Loss: 34.872761 \tValidation Loss: 32.725993\n", + "Validation loss decreased (38.943442 --> 32.725993). Saving model ...\n", + "Epoch: 2 \tTraining Loss: 30.709429 \tValidation Loss: 29.464331\n", + "Validation loss decreased (32.725993 --> 29.464331). Saving model ...\n", + "Epoch: 3 \tTraining Loss: 28.485044 \tValidation Loss: 28.414431\n", + "Validation loss decreased (29.464331 --> 28.414431). Saving model ...\n", + "Epoch: 4 \tTraining Loss: 26.830099 \tValidation Loss: 26.364449\n", + "Validation loss decreased (28.414431 --> 26.364449). Saving model ...\n", + "Epoch: 5 \tTraining Loss: 25.469640 \tValidation Loss: 26.365138\n", + "Epoch: 6 \tTraining Loss: 24.304000 \tValidation Loss: 24.570287\n", + "Validation loss decreased (26.364449 --> 24.570287). Saving model ...\n", + "Epoch: 7 \tTraining Loss: 23.247810 \tValidation Loss: 23.820702\n", + "Validation loss decreased (24.570287 --> 23.820702). Saving model ...\n", + "Epoch: 8 \tTraining Loss: 22.400491 \tValidation Loss: 23.790809\n", + "Validation loss decreased (23.820702 --> 23.790809). Saving model ...\n", + "Epoch: 9 \tTraining Loss: 21.533881 \tValidation Loss: 23.234611\n", + "Validation loss decreased (23.790809 --> 23.234611). Saving model ...\n", + "Epoch: 10 \tTraining Loss: 20.718155 \tValidation Loss: 23.801281\n", + "Epoch: 11 \tTraining Loss: 19.985109 \tValidation Loss: 22.480761\n", + "Validation loss decreased (23.234611 --> 22.480761). Saving model ...\n", + "Epoch: 12 \tTraining Loss: 19.240459 \tValidation Loss: 22.505575\n", + "Epoch: 13 \tTraining Loss: 18.551414 \tValidation Loss: 22.415395\n", + "Validation loss decreased (22.480761 --> 22.415395). Saving model ...\n", + "Epoch: 14 \tTraining Loss: 17.908126 \tValidation Loss: 21.999896\n", + "Validation loss decreased (22.415395 --> 21.999896). Saving model ...\n", + "Epoch: 15 \tTraining Loss: 17.369180 \tValidation Loss: 22.417102\n", + "Epoch: 16 \tTraining Loss: 16.773355 \tValidation Loss: 22.192228\n", + "Epoch: 17 \tTraining Loss: 16.176232 \tValidation Loss: 21.979470\n", + "Validation loss decreased (21.999896 --> 21.979470). Saving model ...\n", + "Epoch: 18 \tTraining Loss: 15.706412 \tValidation Loss: 22.365677\n", + "Epoch: 19 \tTraining Loss: 15.168052 \tValidation Loss: 22.861357\n", + "Epoch: 20 \tTraining Loss: 14.587228 \tValidation Loss: 23.245590\n", + "Epoch: 21 \tTraining Loss: 14.180318 \tValidation Loss: 24.290684\n", + "Epoch: 22 \tTraining Loss: 13.679101 \tValidation Loss: 23.016075\n", + "Epoch: 23 \tTraining Loss: 13.202287 \tValidation Loss: 23.932568\n", + "Epoch: 24 \tTraining Loss: 12.708487 \tValidation Loss: 25.336128\n", + "Epoch: 25 \tTraining Loss: 12.288698 \tValidation Loss: 25.429984\n", + "Epoch: 26 \tTraining Loss: 11.907723 \tValidation Loss: 25.383014\n", + "Epoch: 27 \tTraining Loss: 11.487043 \tValidation Loss: 25.936636\n", + "Epoch: 28 \tTraining Loss: 11.127419 \tValidation Loss: 28.218890\n", + "Epoch: 29 \tTraining Loss: 10.661623 \tValidation Loss: 27.170219\n" + ] + } + ], "source": [ "import torch.optim as optim\n", "\n", @@ -324,6 +524,18 @@ "Does overfit occur? If so, do an early stopping." ] }, + { + "cell_type": "markdown", + "id": "668a6413", + "metadata": {}, + "source": [ + "Yes, overfitting occurs.\n", + "- Training loss steadily decreases throughout the epochs, reaching very low values.\n", + "- Validation loss decreases initially but starts to increase after epoch 17, suggesting that the model is overfitting to the training data and not generalizing well to the validation data.\n", + "\n", + "And the others indicators of this overfitting is the divergence between training and validation losses after a certain point and also validation loss starts to increase while the training loss continues to decrease" + ] + }, { "cell_type": "code", "execution_count": null, @@ -926,7 +1138,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.5 ('base')", + "display_name": "Python 3.11.3 64-bit", "language": "python", "name": "python3" }, @@ -940,11 +1152,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.11.3" }, "vscode": { "interpreter": { - "hash": "9e3efbebb05da2d4a1968abe9a0645745f54b63feb7a85a514e4da0495be97eb" + "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" } } },