diff --git a/TD2 Deep Learning.ipynb b/TD2 Deep Learning.ipynb index 48543b667566028121d970d5b10f580f839f0ff7..2194e766254ad2dca2af828099b149926d889bf2 100644 --- a/TD2 Deep Learning.ipynb +++ b/TD2 Deep Learning.ipynb @@ -2145,6 +2145,365 @@ "Apply ther quantization (post and quantization aware) and evaluate impact on model size and accuracy." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Modified code \n", + "import copy\n", + "import os\n", + "import time\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "import torchvision\n", + "from torch.optim import lr_scheduler\n", + "from torchvision import datasets, transforms\n", + "\n", + "# Data augmentation and normalization for training\n", + "# Just normalization for validation\n", + "data_transforms = {\n", + " \"train\": transforms.Compose(\n", + " [\n", + " transforms.RandomResizedCrop(224),\n", + " transforms.RandomHorizontalFlip(),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n", + " ]\n", + " ),\n", + " \"val\": transforms.Compose(\n", + " [\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n", + " ]\n", + " ),\n", + " \"test\": transforms.Compose(\n", + " [\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n", + " ]\n", + " ),\n", + "}\n", + "\n", + "data_dir = \"hymenoptera_data\"\n", + "# Create train and validation datasets and loaders\n", + "image_datasets = {\n", + " x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])\n", + " for x in [\"train\", \"val\",\"test\"]\n", + "}\n", + "dataloaders = {\n", + " x: torch.utils.data.DataLoader(\n", + " image_datasets[x], batch_size=4, shuffle=True, num_workers=4\n", + " )\n", + " for x in [\"train\", \"val\",\"test\"]\n", + "}\n", + "dataset_sizes = {x: len(image_datasets[x]) for x in [\"train\", \"val\", \"test\"]}\n", + "class_names = image_datasets[\"train\"].classes\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "# Helper function for displaying images\n", + "def imshow(inp, title=None):\n", + " \"\"\"Imshow for Tensor.\"\"\"\n", + " inp = inp.numpy().transpose((1, 2, 0))\n", + " mean = np.array([0.485, 0.456, 0.406])\n", + " std = np.array([0.229, 0.224, 0.225])\n", + "\n", + " # Un-normalize the images\n", + " inp = std * inp + mean\n", + " # Clip just in case\n", + " inp = np.clip(inp, 0, 1)\n", + " plt.imshow(inp)\n", + " if title is not None:\n", + " plt.title(title)\n", + " plt.pause(0.001) # pause a bit so that plots are updated\n", + " plt.show()\n", + "\n", + "\n", + "# Get a batch of training data\n", + "# inputs, classes = next(iter(dataloaders['train']))\n", + "\n", + "# Make a grid from batch\n", + "# out = torchvision.utils.make_grid(inputs)\n", + "\n", + "# imshow(out, title=[class_names[x] for x in classes])\n", + "# training\n", + "\n", + "\n", + "def train_model(model, criterion, optimizer, scheduler, num_epochs=25):\n", + " since = time.time()\n", + "\n", + " best_model_wts = copy.deepcopy(model.state_dict())\n", + " best_acc = 0.0\n", + "\n", + " epoch_time = [] # we'll keep track of the time needed for each epoch\n", + "\n", + " for epoch in range(num_epochs):\n", + " epoch_start = time.time()\n", + " print(\"Epoch {}/{}\".format(epoch + 1, num_epochs))\n", + " print(\"-\" * 10)\n", + "\n", + " # Each epoch has a training and validation phase\n", + " for phase in [\"train\", \"val\"]:\n", + " if phase == \"train\":\n", + " scheduler.step()\n", + " model.train() # Set model to training mode\n", + " else:\n", + " model.eval() # Set model to evaluate mode\n", + "\n", + " running_loss = 0.0\n", + " running_corrects = 0\n", + "\n", + " # Iterate over data.\n", + " for inputs, labels in dataloaders[phase]:\n", + " inputs = inputs.to(device)\n", + " labels = labels.to(device)\n", + "\n", + " # zero the parameter gradients\n", + " optimizer.zero_grad()\n", + "\n", + " # Forward\n", + " # Track history if only in training phase\n", + " with torch.set_grad_enabled(phase == \"train\"):\n", + " outputs = model(inputs)\n", + " _, preds = torch.max(outputs, 1)\n", + " loss = criterion(outputs, labels)\n", + "\n", + " # backward + optimize only if in training phase\n", + " if phase == \"train\":\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " # Statistics\n", + " running_loss += loss.item() * inputs.size(0)\n", + " running_corrects += torch.sum(preds == labels.data)\n", + "\n", + " epoch_loss = running_loss / dataset_sizes[phase]\n", + " epoch_acc = running_corrects.double() / dataset_sizes[phase]\n", + "\n", + " print(\"{} Loss: {:.4f} Acc: {:.4f}\".format(phase, epoch_loss, epoch_acc))\n", + "\n", + " # Deep copy the model\n", + " if phase == \"val\" and epoch_acc > best_acc:\n", + " best_acc = epoch_acc\n", + " best_model_wts = copy.deepcopy(model.state_dict())\n", + "\n", + " # Add the epoch time\n", + " t_epoch = time.time() - epoch_start\n", + " epoch_time.append(t_epoch)\n", + " print()\n", + "\n", + " time_elapsed = time.time() - since\n", + " print(\n", + " \"Training complete in {:.0f}m {:.0f}s\".format(\n", + " time_elapsed // 60, time_elapsed % 60\n", + " )\n", + " )\n", + " print(\"Best val Acc: {:4f}\".format(best_acc))\n", + "\n", + " # Load best model weights\n", + " model.load_state_dict(best_model_wts)\n", + "\n", + " return model, epoch_time\n", + "\n", + "\n", + "# Download a pre-trained ResNet18 model and freeze its weights\n", + "model = torchvision.models.resnet18(pretrained=True)\n", + "for param in model.parameters():\n", + " param.requires_grad = False\n", + "\n", + "# Replace the final fully connected layer\n", + "# Parameters of newly constructed modules have requires_grad=True by default\n", + "num_ftrs = model.fc.in_features\n", + "model.fc = nn.Linear(num_ftrs, 2)\n", + "# Send the model to the GPU\n", + "model = model.to(device)\n", + "# Set the loss function\n", + "criterion = nn.CrossEntropyLoss()\n", + "\n", + "# Observe that only the parameters of the final layer are being optimized\n", + "optimizer_conv = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)\n", + "exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)\n", + "model, epoch_time = train_model(\n", + " model, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=10\n", + ")\n", + "\n", + "# Evaluating the model \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "# track test loss\n", + "test_loss = 0.0\n", + "class_correct = list(0.0 for i in range(10))\n", + "class_total = list(0.0 for i in range(10))\n", + "\n", + "model.eval()\n", + "# iterate over test data\n", + "for data, target in image_datasets[\"test\"]:\n", + " # move tensors to GPU if CUDA is available\n", + " if train_on_gpu:\n", + " data, target = data.cuda(), target.cuda()\n", + " # forward pass: compute predicted outputs by passing inputs to the model\n", + " output = model(data)\n", + " # calculate the batch loss\n", + " loss = criterion(output, target)\n", + " # update test loss\n", + " test_loss += loss.item() * data.size(0)\n", + " # convert output probabilities to predicted class\n", + " _, pred = torch.max(output, 1)\n", + " # compare predictions to true label\n", + " correct_tensor = pred.eq(target.data.view_as(pred))\n", + " correct = (\n", + " np.squeeze(correct_tensor.numpy())\n", + " if not train_on_gpu\n", + " else np.squeeze(correct_tensor.cpu().numpy())\n", + " )\n", + " # calculate test accuracy for each object class\n", + " for i in range(batch_size):\n", + " label = target.data[i]\n", + " class_correct[label] += correct[i].item()\n", + " class_total[label] += 1\n", + "\n", + "# average test loss\n", + "test_loss = test_loss / len(test_loader)\n", + "print(\"Test Loss: {:.6f}\\n\".format(test_loss))\n", + "\n", + "for i in range(10):\n", + " if class_total[i] > 0:\n", + " print(\n", + " \"Test Accuracy of %5s: %2d%% (%2d/%2d)\"\n", + " % (\n", + " classes[i],\n", + " 100 * class_correct[i] / class_total[i],\n", + " np.sum(class_correct[i]),\n", + " np.sum(class_total[i]),\n", + " )\n", + " )\n", + " else:\n", + " print(\"Test Accuracy of %5s: N/A (no training examples)\" % (classes[i]))\n", + "\n", + "print(\n", + " \"\\nTest Accuracy (Overall): %2d%% (%2d/%2d)\"\n", + " % (\n", + " 100.0 * np.sum(class_correct) / np.sum(class_total),\n", + " np.sum(class_correct),\n", + " np.sum(class_total),\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.quantization\n", + "\n", + "# Post quantization\n", + "\n", + "quantized_model = torch.quantization.quantize_dynamic(model, dtype=torch.qint8)\n", + "print_size_of_model(quantized_model, \"int8\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Aware quantization\n", + "my_model = model\n", + "my_model.qconfig = torch.ao.quantization.get_default_qat_qconfig('fbgemm')\n", + "my_model = torch.ao.quantization.prepare_qat(my_model)\n", + "epochquantized_model=torch.quantization.convert(my_model.eval(), inplace=False)\n", + "\n", + "import torch.optim as optim\n", + "\n", + "criterion = nn.CrossEntropyLoss() # specify loss function\n", + "optimizer = optim.SGD(my_model.parameters(), lr=0.01) # specify optimizer\n", + "\n", + "n_epochs = 30 # number of epochs to train the model\n", + "train_loss_list = [] # list to store loss to visualize\n", + "valid_loss_min = np.Inf # track change in validation loss\n", + "valid_loss_list = []\n", + "\n", + "for epoch in range(n_epochs):\n", + " # Keep track of training and validation loss\n", + " train_loss = 0.0\n", + " valid_loss = 0.0\n", + "\n", + " # Train the model\n", + " my_model.train()\n", + " for data, target in train_loader:\n", + " # Move tensors to GPU if CUDA is available\n", + " if train_on_gpu:\n", + " data, target = data.cuda(), target.cuda()\n", + " # Clear the gradients of all optimized variables\n", + " optimizer.zero_grad()\n", + " # Forward pass: compute predicted outputs by passing inputs to the model\n", + " output = my_model(data)\n", + " # Calculate the batch loss\n", + " loss = criterion(output, target)\n", + " # Backward pass: compute gradient of the loss with respect to model parameters\n", + " loss.backward()\n", + " # Perform a single optimization step (parameter update)\n", + " optimizer.step()\n", + " # Update training loss\n", + " train_loss += loss.item() * data.size(0)\n", + "\n", + " # Validate the model\n", + " my_model.eval()\n", + " for data, target in valid_loader:\n", + " # Move tensors to GPU if CUDA is available\n", + " if train_on_gpu:\n", + " data, target = data.cuda(), target.cuda()\n", + " # Forward pass: compute predicted outputs by passing inputs to the model\n", + " output = my_model(data)\n", + " # Calculate the batch loss\n", + " loss = criterion(output, target)\n", + " # Update average validation loss\n", + " valid_loss += loss.item() * data.size(0)\n", + "\n", + " # Calculate average losses\n", + " train_loss = train_loss / len(train_loader)\n", + " valid_loss = valid_loss / len(valid_loader)\n", + " train_loss_list.append(train_loss)\n", + " valid_loss_list.append(valid_loss)\n", + "\n", + " # Print training/validation statistics\n", + " print(\n", + " \"Epoch: {} \\tTraining Loss: {:.6f} \\tValidation Loss: {:.6f}\".format(\n", + " epoch, train_loss, valid_loss\n", + " )\n", + " )\n", + "\n", + " # Save model if validation loss has decreased\n", + " if valid_loss <= valid_loss_min:\n", + " print(\n", + " \"Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...\".format(\n", + " valid_loss_min, valid_loss\n", + " )\n", + " )\n", + " torch.save(my_model.state_dict(), \"my_model_cifar.pt\")\n", + " valid_loss_min = valid_loss" + ] + }, { "cell_type": "markdown", "id": "04a263f0",