diff --git a/be2_gan.ipynb b/be2_gan.ipynb index c764ded6d6517cbb76ff6153980b71b8df86743e..56e6f1c49fc20aeb54c3710ac504169c23db6fee 100644 --- a/be2_gan.ipynb +++ b/be2_gan.ipynb @@ -35424,7 +35424,6 @@ "image, mask = next(iter(val_dataloader))\n", "output = generator(mask.to(device))\n", "output = output.cpu().detach()\n", - "\n", "for i in range(8):\n", " image_plot = reverse_transform(image[i])\n", " output_plot = reverse_transform(output[i])\n", @@ -35498,9 +35497,24 @@ "metadata": {}, "outputs": [], "source": [ - "# TODO: change the variable name to match the one you used in part 1 or reload the dataset\n", - "mnist_dataset = \n", - "mnist_dataloader =" + "from torchvision import datasets, transforms\n", + "from torch.utils.data import DataLoader\n", + "\n", + "\n", + "batch_size = 128\n", + "image_size = 64\n", + "\n", + "\n", + "transform = transforms.Compose([\n", + " transforms.Resize(image_size), \n", + " transforms.ToTensor(), \n", + " transforms.Normalize((0.5,), (0.5,)) \n", + "])\n", + "\n", + "mnist_dataset = datasets.MNIST(\n", + " root=\"./data\", train=True, download=True, transform=transform\n", + ")\n", + "mnist_dataloader = DataLoader(mnist_dataset, batch_size=batch_size, shuffle=True)\n" ] }, { @@ -35518,7 +35532,7 @@ "source": [ "def plot1xNArray(images, labels):\n", " f, axarr = plt.subplots(1, len(images))\n", - " \n", + "\n", " for image, ax, label in zip(images, axarr, labels):\n", " ax.imshow(image, cmap='gray')\n", " ax.axis('off')\n", @@ -35542,25 +35556,36 @@ "metadata": {}, "outputs": [], "source": [ + "import torch\n", "from diffusers import DDPMScheduler\n", "\n", - "# TODO: Create the scheduler\n", - "noise_scheduler = \n", + "# 1. Scheduler de bruit\n", + "noise_scheduler = DDPMScheduler(num_train_timesteps=1000)\n", "\n", + "# 2. Charger une image\n", "image, _ = mnist_dataset[0]\n", + "image = image.unsqueeze(0).to(torch.float32) # (1, 1, 64, 64)\n", + "\n", + "# 3. Bruit aléatoire\n", + "noise = torch.randn_like(image)\n", "\n", - "# TODO: Create a noise tensor sampled from a normal distribution with the same shape as the image\n", - "noise = \n", + "# 4. Transformation inverse pour affichage\n", + "def reverse_transform(tensor):\n", + " return (tensor.clamp(-1, 1) + 1) / 2\n", "\n", - "images, labels = [reverse_transform(image)], [\"Original\"]\n", + "# 5. Visualisation\n", + "images = [reverse_transform(image[0]).squeeze(0)] # (64, 64)\n", + "labels = [\"Original\"]\n", "\n", "for i in [100, 250, 400, 900]:\n", - " timestep = torch.LongTensor([i])\n", + " timestep = torch.tensor([i], dtype=torch.long)\n", " noisy_image = noise_scheduler.add_noise(image, noise, timestep)\n", - " images.append(reverse_transform(noisy_image))\n", + " noisy_image = reverse_transform(noisy_image[0]).squeeze(0) # (64, 64)\n", + " images.append(noisy_image)\n", " labels.append(f\"t={i}\")\n", "\n", - "plot1xNArray(images, labels)" + "# 6. Affichage\n", + "plot1xNArray(images, labels)\n" ] }, { @@ -35582,20 +35607,22 @@ "source": [ "from diffusers import UNet2DModel\n", "\n", - "# TODO: Complete the parameters\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", "diffusion_backbone = UNet2DModel(\n", - " block_out_channels=(64, 128, 256, 512),\n", - " down_block_types=(\"DownBlock2D\", \"DownBlock2D\", \"DownBlock2D\", \"DownBlock2D\"),\n", - " up_block_types=(\"UpBlock2D\", \"UpBlock2D\", \"UpBlock2D\", \"UpBlock2D\"),\n", - " sample_size=,\n", - " in_channels=,\n", - " out_channels=,\n", - " ).to(device)\n", - " \n", + " sample_size=64, # image 64x64\n", + " in_channels=1, # MNIST grayscale\n", + " out_channels=1, # prédiction du bruit (même taille que l'entrée)\n", + " block_out_channels=(64, 128, 256, 512),\n", + " down_block_types=(\"DownBlock2D\", \"DownBlock2D\", \"DownBlock2D\", \"DownBlock2D\"),\n", + " up_block_types=(\"UpBlock2D\", \"UpBlock2D\", \"UpBlock2D\", \"UpBlock2D\"),\n", + ").to(device)\n", + "\n", "# Optimizer\n", "optimizer = torch.optim.AdamW(diffusion_backbone.parameters(), lr=1e-4)\n", "\n", - "print(diffusion_backbone)" + "# Affichage du modèle\n", + "print(diffusion_backbone)\n" ] }, { @@ -35651,15 +35678,15 @@ " timesteps = torch.randint(noise_scheduler.config.num_train_timesteps, (images.size(0),), device=device)\n", "\n", " # TODO: Complete the code\n", - " noise = \n", - " noisy_images = \n", + " noise =\n", + " noisy_images =\n", "\n", " # Forward pass\n", " residual = diffusion_backbone(noisy_images, timesteps).sample\n", - " \n", + "\n", " # TODO: Compute the loss\n", - " loss = \n", - " \n", + " loss =\n", + "\n", " loss.backward()\n", " optimizer.step()\n", "\n",