diff --git a/TD2 Deep Learning.ipynb b/TD2 Deep Learning.ipynb index 88c2cb7de2b9c94d56a889ecd22b390827780408..8ad507cea32f75710f1756b6c233086fcc1898f9 100644 --- a/TD2 Deep Learning.ipynb +++ b/TD2 Deep Learning.ipynb @@ -1087,21 +1087,31 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## New network" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "CNN definition" + "CNN definition following the structure required above" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Net_1(\n", + " (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (fc1): Linear(in_features=1024, out_features=512, bias=True)\n", + " (fc2): Linear(in_features=512, out_features=64, bias=True)\n", + " (fc3): Linear(in_features=64, out_features=10, bias=True)\n", + " (dropout): Dropout(p=0.5, inplace=False)\n", + ")\n" + ] + } + ], "source": [ "import torch.nn as nn\n", "import torch.nn.functional as F\n", @@ -1162,6 +1172,148 @@ " model.cuda()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Loss function and training using SGD (Stochastic Gradient Descent) optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 0 \tTraining Loss: 45.824805 \tValidation Loss: 44.098061\n", + "Validation loss decreased (inf --> 44.098061). Saving model ...\n", + "Epoch: 1 \tTraining Loss: 41.092585 \tValidation Loss: 36.748989\n", + "Validation loss decreased (44.098061 --> 36.748989). Saving model ...\n", + "Epoch: 2 \tTraining Loss: 35.776831 \tValidation Loss: 32.416112\n", + "Validation loss decreased (36.748989 --> 32.416112). Saving model ...\n", + "Epoch: 3 \tTraining Loss: 32.982180 \tValidation Loss: 29.739034\n", + "Validation loss decreased (32.416112 --> 29.739034). Saving model ...\n", + "Epoch: 4 \tTraining Loss: 30.876129 \tValidation Loss: 28.481162\n", + "Validation loss decreased (29.739034 --> 28.481162). Saving model ...\n", + "Epoch: 5 \tTraining Loss: 29.058467 \tValidation Loss: 25.692209\n", + "Validation loss decreased (28.481162 --> 25.692209). Saving model ...\n", + "Epoch: 6 \tTraining Loss: 27.521015 \tValidation Loss: 24.506301\n", + "Validation loss decreased (25.692209 --> 24.506301). Saving model ...\n", + "Epoch: 7 \tTraining Loss: 26.234757 \tValidation Loss: 23.046333\n", + "Validation loss decreased (24.506301 --> 23.046333). Saving model ...\n", + "Epoch: 8 \tTraining Loss: 25.024110 \tValidation Loss: 22.182746\n", + "Validation loss decreased (23.046333 --> 22.182746). Saving model ...\n", + "Epoch: 9 \tTraining Loss: 23.719521 \tValidation Loss: 21.154988\n", + "Validation loss decreased (22.182746 --> 21.154988). Saving model ...\n", + "Epoch: 10 \tTraining Loss: 22.675286 \tValidation Loss: 20.148329\n", + "Validation loss decreased (21.154988 --> 20.148329). Saving model ...\n", + "Epoch: 11 \tTraining Loss: 21.529691 \tValidation Loss: 19.110659\n", + "Validation loss decreased (20.148329 --> 19.110659). Saving model ...\n", + "Epoch: 12 \tTraining Loss: 20.730257 \tValidation Loss: 18.273050\n", + "Validation loss decreased (19.110659 --> 18.273050). Saving model ...\n", + "Epoch: 13 \tTraining Loss: 19.809760 \tValidation Loss: 17.508739\n", + "Validation loss decreased (18.273050 --> 17.508739). Saving model ...\n", + "Epoch: 14 \tTraining Loss: 18.948443 \tValidation Loss: 17.371757\n", + "Validation loss decreased (17.508739 --> 17.371757). Saving model ...\n", + "Epoch: 15 \tTraining Loss: 18.049396 \tValidation Loss: 16.754709\n", + "Validation loss decreased (17.371757 --> 16.754709). Saving model ...\n", + "Epoch: 16 \tTraining Loss: 17.303731 \tValidation Loss: 16.921118\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[1;32md:\\Users\\lucil\\Documents\\S9\\Apprentissage profond\\mod_4_6-td2\\TD2 Deep Learning.ipynb Cell 24\u001b[0m line \u001b[0;36m2\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Users/lucil/Documents/S9/Apprentissage%20profond/mod_4_6-td2/TD2%20Deep%20Learning.ipynb#X56sZmlsZQ%3D%3D?line=25'>26</a>\u001b[0m loss \u001b[39m=\u001b[39m criterion(output, target)\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Users/lucil/Documents/S9/Apprentissage%20profond/mod_4_6-td2/TD2%20Deep%20Learning.ipynb#X56sZmlsZQ%3D%3D?line=26'>27</a>\u001b[0m \u001b[39m# Backward pass: compute gradient of the loss with respect to model parameters\u001b[39;00m\n\u001b[1;32m---> <a href='vscode-notebook-cell:/d%3A/Users/lucil/Documents/S9/Apprentissage%20profond/mod_4_6-td2/TD2%20Deep%20Learning.ipynb#X56sZmlsZQ%3D%3D?line=27'>28</a>\u001b[0m loss\u001b[39m.\u001b[39mbackward()\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Users/lucil/Documents/S9/Apprentissage%20profond/mod_4_6-td2/TD2%20Deep%20Learning.ipynb#X56sZmlsZQ%3D%3D?line=28'>29</a>\u001b[0m \u001b[39m# Perform a single optimization step (parameter update)\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/Users/lucil/Documents/S9/Apprentissage%20profond/mod_4_6-td2/TD2%20Deep%20Learning.ipynb#X56sZmlsZQ%3D%3D?line=29'>30</a>\u001b[0m optimizer\u001b[39m.\u001b[39mstep()\n", + "File \u001b[1;32mc:\\Users\\lucil\\anaconda3\\Lib\\site-packages\\torch\\_tensor.py:492\u001b[0m, in \u001b[0;36mTensor.backward\u001b[1;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[0;32m 482\u001b[0m \u001b[39mif\u001b[39;00m has_torch_function_unary(\u001b[39mself\u001b[39m):\n\u001b[0;32m 483\u001b[0m \u001b[39mreturn\u001b[39;00m handle_torch_function(\n\u001b[0;32m 484\u001b[0m Tensor\u001b[39m.\u001b[39mbackward,\n\u001b[0;32m 485\u001b[0m (\u001b[39mself\u001b[39m,),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 490\u001b[0m inputs\u001b[39m=\u001b[39minputs,\n\u001b[0;32m 491\u001b[0m )\n\u001b[1;32m--> 492\u001b[0m torch\u001b[39m.\u001b[39mautograd\u001b[39m.\u001b[39mbackward(\n\u001b[0;32m 493\u001b[0m \u001b[39mself\u001b[39m, gradient, retain_graph, create_graph, inputs\u001b[39m=\u001b[39minputs\n\u001b[0;32m 494\u001b[0m )\n", + "File \u001b[1;32mc:\\Users\\lucil\\anaconda3\\Lib\\site-packages\\torch\\autograd\\__init__.py:251\u001b[0m, in \u001b[0;36mbackward\u001b[1;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[0;32m 246\u001b[0m retain_graph \u001b[39m=\u001b[39m create_graph\n\u001b[0;32m 248\u001b[0m \u001b[39m# The reason we repeat the same comment below is that\u001b[39;00m\n\u001b[0;32m 249\u001b[0m \u001b[39m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[0;32m 250\u001b[0m \u001b[39m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[1;32m--> 251\u001b[0m Variable\u001b[39m.\u001b[39m_execution_engine\u001b[39m.\u001b[39mrun_backward( \u001b[39m# Calls into the C++ engine to run the backward pass\u001b[39;00m\n\u001b[0;32m 252\u001b[0m tensors,\n\u001b[0;32m 253\u001b[0m grad_tensors_,\n\u001b[0;32m 254\u001b[0m retain_graph,\n\u001b[0;32m 255\u001b[0m create_graph,\n\u001b[0;32m 256\u001b[0m inputs,\n\u001b[0;32m 257\u001b[0m allow_unreachable\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[0;32m 258\u001b[0m accumulate_grad\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[0;32m 259\u001b[0m )\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "import torch.optim as optim\n", + "\n", + "criterion = nn.CrossEntropyLoss() # specify loss function\n", + "optimizer = optim.SGD(model_1.parameters(), lr=0.01) # specify optimizer\n", + "\n", + "n_epochs_1 = 30 # number of epochs to train the model\n", + "train_loss_list_1 = [] # list to store loss to visualize\n", + "valid_loss_min_1 = np.Inf # track change in validation loss\n", + "\n", + "for epoch in range(n_epochs):\n", + " # Keep track of training and validation loss\n", + " train_loss = 0.0\n", + " valid_loss = 0.0\n", + "\n", + " # Train the model\n", + " model_1.train()\n", + " for data, target in train_loader:\n", + " # Move tensors to GPU if CUDA is available\n", + " if train_on_gpu:\n", + " data, target = data.cuda(), target.cuda()\n", + " # Clear the gradients of all optimized variables\n", + " optimizer.zero_grad()\n", + " # Forward pass: compute predicted outputs by passing inputs to the model\n", + " output = model_1(data)\n", + " # Calculate the batch loss\n", + " loss = criterion(output, target)\n", + " # Backward pass: compute gradient of the loss with respect to model parameters\n", + " loss.backward()\n", + " # Perform a single optimization step (parameter update)\n", + " optimizer.step()\n", + " # Update training loss\n", + " train_loss += loss.item() * data.size(0)\n", + "\n", + " # Validate the model\n", + " model_1.eval()\n", + " for data, target in valid_loader:\n", + " # Move tensors to GPU if CUDA is available\n", + " if train_on_gpu:\n", + " data, target = data.cuda(), target.cuda()\n", + " # Forward pass: compute predicted outputs by passing inputs to the model\n", + " output = model_1(data)\n", + " # Calculate the batch loss\n", + " loss = criterion(output, target)\n", + " # Update average validation loss\n", + " valid_loss += loss.item() * data.size(0)\n", + "\n", + " # Calculate average losses\n", + " train_loss = train_loss / len(train_loader)\n", + " valid_loss = valid_loss / len(valid_loader)\n", + " train_loss_list_1.append(train_loss)\n", + "\n", + " # Print training/validation statistics\n", + " print(\n", + " \"Epoch: {} \\tTraining Loss: {:.6f} \\tValidation Loss: {:.6f}\".format(\n", + " epoch, train_loss, valid_loss\n", + " )\n", + " )\n", + "\n", + " # Save model if validation loss has decreased\n", + " if valid_loss <= valid_loss_min_1:\n", + " print(\n", + " \"Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...\".format(\n", + " valid_loss_min_1, valid_loss\n", + " )\n", + " )\n", + " torch.save(model_1.state_dict(), \"model_cifar.pt\")\n", + " valid_loss_min_1 = valid_loss" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Comparison with the previous model's results" + ] + }, { "cell_type": "markdown", "id": "bc381cf4",