diff --git a/TD2 Deep Learning.ipynb b/TD2 Deep Learning.ipynb index 3bbbe5b51bc8c95a154dac5a194c2993620d7c28..d4132b031284f220f100045a11f5760c653b2845 100644 --- a/TD2 Deep Learning.ipynb +++ b/TD2 Deep Learning.ipynb @@ -1349,7 +1349,7 @@ { "cell_type": "code", "execution_count": 22, - "id": "96729f55", + "id": "a931c3ef", "metadata": {}, "outputs": [ { @@ -1382,7 +1382,7 @@ { "cell_type": "code", "execution_count": 23, - "id": "476ab007", + "id": "0f27e498", "metadata": {}, "outputs": [ { @@ -1414,7 +1414,7 @@ }, { "cell_type": "markdown", - "id": "9c9dd5af", + "id": "fbfa0e80", "metadata": {}, "source": [ "The model perfectly recognizes images of tennis balls and submarines.\n", @@ -1424,7 +1424,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "ff5231c0", + "id": "f51d6c65", "metadata": {}, "outputs": [ { @@ -1454,7 +1454,7 @@ { "cell_type": "code", "execution_count": 28, - "id": "945c82f4", + "id": "b7a7de19", "metadata": {}, "outputs": [ { @@ -1476,7 +1476,7 @@ { "cell_type": "code", "execution_count": 27, - "id": "348ddefc", + "id": "cd81f20d", "metadata": {}, "outputs": [ { @@ -1508,7 +1508,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "93cf9280", + "id": "b41394a7", "metadata": {}, "outputs": [ { @@ -1530,7 +1530,7 @@ }, { "cell_type": "markdown", - "id": "602c1b5b", + "id": "94eb44ab", "metadata": {}, "source": [ "The quantized model recognizes the three images as well as the initial model." @@ -1929,6 +1929,422 @@ "Apply ther quantization (post and quantization aware) and evaluate impact on model size and accuracy." ] }, + { + "cell_type": "markdown", + "id": "5ff80276", + "metadata": {}, + "source": [ + "## Eval Model" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "1990155d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validation Loss: 0.2484 Accuracy: 0.9333\n" + ] + }, + { + "data": { + "text/plain": [ + "(0.2484304135044416, tensor(0.9333, dtype=torch.float64))" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Data augmentation and normalization for training\n", + "# Just normalization for validation\n", + "data_transforms = {\n", + " \"test\": transforms.Compose(\n", + " [\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n", + " ]\n", + " ),\n", + "}\n", + "\n", + "image_datasets = {\n", + " x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])\n", + " for x in [\"test\"]\n", + "}\n", + "\n", + "dataloader = {\n", + " x: torch.utils.data.DataLoader(\n", + " image_datasets[x], batch_size=4, shuffle=True, num_workers=4\n", + " )\n", + " for x in [\"test\"]\n", + "}\n", + "\n", + "dataset_sizes = {x: len(image_datasets[x]) for x in [\"test\"]}\n", + "class_names = image_datasets[\"test\"].classes\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "def eval_model(model, dataloader, criterion):\n", + " model.eval()\n", + "\n", + " running_loss = 0.0\n", + " running_corrects = 0\n", + "\n", + " for inputs, labels in dataloaders[\"test\"]:\n", + " inputs = inputs.to(device)\n", + " labels = labels.to(device)\n", + "\n", + " outputs = model(inputs)\n", + " _, preds = torch.max(outputs, 1)\n", + " loss = criterion(outputs, labels)\n", + "\n", + " running_loss += loss.item() * inputs.size(0)\n", + " running_corrects += torch.sum(preds == labels.data)\n", + " \n", + " total_loss = running_loss / dataset_sizes[\"test\"]\n", + " total_accuracy = running_corrects.double() / dataset_sizes[\"test\"]\n", + " \n", + " print(\"Validation Loss: {:.4f} Accuracy: {:.4f}\".format(total_loss, total_accuracy)) \n", + " return total_loss, total_accuracy\n", + "\n", + "eval_model(model, dataloader, criterion)" + ] + }, + { + "cell_type": "markdown", + "id": "8f79cd31", + "metadata": {}, + "source": [ + "Now modify the code to replace the current classification layer with a set of two layers using a \"relu\" activation function for the middle layer, and the \"dropout\" mechanism for both layers. Renew the experiments and study the results obtained." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "6cbc7079", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "----------\n", + "train Loss: 0.6524 Acc: 0.6025\n", + "val Loss: 0.4151 Acc: 0.9150\n", + "\n", + "Epoch 2/10\n", + "----------\n", + "train Loss: 0.4916 Acc: 0.7869\n", + "val Loss: 0.2853 Acc: 0.9412\n", + "\n", + "Epoch 3/10\n", + "----------\n", + "train Loss: 0.4686 Acc: 0.7705\n", + "val Loss: 0.2572 Acc: 0.9412\n", + "\n", + "Epoch 4/10\n", + "----------\n", + "train Loss: 0.4664 Acc: 0.8115\n", + "val Loss: 0.3574 Acc: 0.8366\n", + "\n", + "Epoch 5/10\n", + "----------\n", + "train Loss: 0.4705 Acc: 0.7664\n", + "val Loss: 0.2105 Acc: 0.9346\n", + "\n", + "Epoch 6/10\n", + "----------\n", + "train Loss: 0.4999 Acc: 0.7500\n", + "val Loss: 0.2255 Acc: 0.9216\n", + "\n", + "Epoch 7/10\n", + "----------\n", + "train Loss: 0.3877 Acc: 0.8402\n", + "val Loss: 0.2053 Acc: 0.9542\n", + "\n", + "Epoch 8/10\n", + "----------\n", + "train Loss: 0.4351 Acc: 0.7951\n", + "val Loss: 0.2105 Acc: 0.9346\n", + "\n", + "Epoch 9/10\n", + "----------\n", + "train Loss: 0.3654 Acc: 0.8402\n", + "val Loss: 0.2073 Acc: 0.9346\n", + "\n", + "Epoch 10/10\n", + "----------\n", + "train Loss: 0.4383 Acc: 0.8074\n", + "val Loss: 0.2045 Acc: 0.9542\n", + "\n", + "Training complete in 2m 38s\n", + "Best val Acc: 0.954248\n" + ] + } + ], + "source": [ + "# Data augmentation and normalization for training\n", + "# Just normalization for validation\n", + "data_transforms = {\n", + " \"train\": transforms.Compose(\n", + " [\n", + " transforms.RandomResizedCrop(\n", + " 224\n", + " ), # ImageNet models were trained on 224x224 images\n", + " transforms.RandomHorizontalFlip(), # flip horizontally 50% of the time - increases train set variability\n", + " transforms.ToTensor(), # convert it to a PyTorch tensor\n", + " transforms.Normalize(\n", + " [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]\n", + " ), # ImageNet models expect this norm\n", + " ]\n", + " ),\n", + " \"val\": transforms.Compose(\n", + " [\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n", + " ]\n", + " ),\n", + "}\n", + "\n", + "data_dir = \"hymenoptera_data\"\n", + "# Create train and validation datasets and loaders\n", + "image_datasets = {\n", + " x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])\n", + " for x in [\"train\", \"val\"]\n", + "}\n", + "dataloaders = {\n", + " x: torch.utils.data.DataLoader(\n", + " image_datasets[x], batch_size=4, shuffle=True, num_workers=4\n", + " )\n", + " for x in [\"train\", \"val\"]\n", + "}\n", + "dataset_sizes = {x: len(image_datasets[x]) for x in [\"train\", \"val\"]}\n", + "class_names = image_datasets[\"train\"].classes\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "# Helper function for displaying images\n", + "def imshow(inp, title=None):\n", + " \"\"\"Imshow for Tensor.\"\"\"\n", + " inp = inp.numpy().transpose((1, 2, 0))\n", + " mean = np.array([0.485, 0.456, 0.406])\n", + " std = np.array([0.229, 0.224, 0.225])\n", + "\n", + " # Un-normalize the images\n", + " inp = std * inp + mean\n", + " # Clip just in case\n", + " inp = np.clip(inp, 0, 1)\n", + " plt.imshow(inp)\n", + " if title is not None:\n", + " plt.title(title)\n", + " plt.pause(0.001) # pause a bit so that plots are updated\n", + " plt.show()\n", + "\n", + "\n", + "# Get a batch of training data\n", + "# inputs, classes = next(iter(dataloaders['train']))\n", + "\n", + "# Make a grid from batch\n", + "# out = torchvision.utils.make_grid(inputs)\n", + "\n", + "# imshow(out, title=[class_names[x] for x in classes])\n", + "# training\n", + "\n", + "\n", + "def train_model(model, criterion, optimizer, scheduler, num_epochs=25):\n", + " since = time.time()\n", + "\n", + " best_model_wts = copy.deepcopy(model.state_dict())\n", + " best_acc = 0.0\n", + "\n", + " epoch_time = [] # we'll keep track of the time needed for each epoch\n", + "\n", + " for epoch in range(num_epochs):\n", + " epoch_start = time.time()\n", + " print(\"Epoch {}/{}\".format(epoch + 1, num_epochs))\n", + " print(\"-\" * 10)\n", + "\n", + " # Each epoch has a training and validation phase\n", + " for phase in [\"train\", \"val\"]:\n", + " if phase == \"train\":\n", + " scheduler.step()\n", + " model.train() # Set model to training mode\n", + " else:\n", + " model.eval() # Set model to evaluate mode\n", + "\n", + " running_loss = 0.0\n", + " running_corrects = 0\n", + "\n", + " # Iterate over data.\n", + " for inputs, labels in dataloaders[phase]:\n", + " inputs = inputs.to(device)\n", + " labels = labels.to(device)\n", + "\n", + " # zero the parameter gradients\n", + " optimizer.zero_grad()\n", + "\n", + " # Forward\n", + " # Track history if only in training phase\n", + " with torch.set_grad_enabled(phase == \"train\"):\n", + " outputs = model(inputs)\n", + " _, preds = torch.max(outputs, 1)\n", + " loss = criterion(outputs, labels)\n", + "\n", + " # backward + optimize only if in training phase\n", + " if phase == \"train\":\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " # Statistics\n", + " running_loss += loss.item() * inputs.size(0)\n", + " running_corrects += torch.sum(preds == labels.data)\n", + "\n", + " epoch_loss = running_loss / dataset_sizes[phase]\n", + " epoch_acc = running_corrects.double() / dataset_sizes[phase]\n", + "\n", + " print(\"{} Loss: {:.4f} Acc: {:.4f}\".format(phase, epoch_loss, epoch_acc))\n", + "\n", + " # Deep copy the model\n", + " if phase == \"val\" and epoch_acc > best_acc:\n", + " best_acc = epoch_acc\n", + " best_model_wts = copy.deepcopy(model.state_dict())\n", + "\n", + " # Add the epoch time\n", + " t_epoch = time.time() - epoch_start\n", + " epoch_time.append(t_epoch)\n", + " print()\n", + "\n", + " time_elapsed = time.time() - since\n", + " print(\n", + " \"Training complete in {:.0f}m {:.0f}s\".format(\n", + " time_elapsed // 60, time_elapsed % 60\n", + " )\n", + " )\n", + " print(\"Best val Acc: {:4f}\".format(best_acc))\n", + "\n", + " # Load best model weights\n", + " model.load_state_dict(best_model_wts)\n", + " return model, epoch_time\n", + "\n", + "\n", + "# Download a pre-trained ResNet18 model and freeze its weights\n", + "model = torchvision.models.resnet18(pretrained=True)\n", + "for param in model.parameters():\n", + " param.requires_grad = False\n", + "\n", + "# Replace the final fully connected layer\n", + "# Parameters of newly constructed modules have requires_grad=True by default\n", + "num_ftrs = model.fc.in_features\n", + "\n", + "model.fc = nn.Sequential(\n", + " nn.Linear(num_ftrs, 256),\n", + " nn.ReLU(),\n", + " nn.Dropout(0.5),\n", + " nn.Linear(256, 2),\n", + ")\n", + "\n", + "\n", + "# Send the model to the GPU\n", + "model = model.to(device)\n", + "# Set the loss function\n", + "criterion = nn.CrossEntropyLoss()\n", + "\n", + "# Observe that only the parameters of the final layer are being optimized\n", + "optimizer_conv = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)\n", + "exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)\n", + "model, epoch_time = train_model(\n", + " model, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=10\n", + ")\n", + "\n", + "torch.save(model.state_dict(), \"model_exo4_relu.pt\")" + ] + }, + { + "cell_type": "markdown", + "id": "4e3ad3b6", + "metadata": {}, + "source": [ + "## Quantized model" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "c15b3052", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "model: int8 \t Size (KB): 45303.823\n", + "model: int8 \t Size (KB): 44910.523\n" + ] + }, + { + "data": { + "text/plain": [ + "44910523" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print_size_of_model(model, \"int8\")\n", + "\n", + "quantized_model = torch.quantization.quantize_dynamic(model, dtype=torch.qint8)\n", + "torch.save(quantized_model.state_dict(), \"model_cifar_exo4_relu_quantized.pt\")\n", + "print_size_of_model(quantized_model, \"int8\")" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "c3b0a07a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validation Loss: 0.2492 Accuracy: 0.9333\n" + ] + }, + { + "data": { + "text/plain": [ + "(0.24922307034333546, tensor(0.9333, dtype=torch.float64))" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eval_model(quantized_model, dataloader, criterion)" + ] + }, + { + "cell_type": "markdown", + "id": "03e3450b", + "metadata": {}, + "source": [ + "The quantized model is as good as the initial model however the size of the model after the quantization process has only be reduced by 1% which is useless." + ] + }, { "cell_type": "markdown", "id": "04a263f0",