diff --git a/main.ipynb b/main.ipynb
index 72d5d962f2be70c0a1acfd68913a545a2b823724..1a9357a783ae569558942a92e323bfeb2a078f29 100644
--- a/main.ipynb
+++ b/main.ipynb
@@ -23,7 +23,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 165,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -41,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 166,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -52,7 +52,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 167,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -61,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 168,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -70,7 +70,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 169,
    "metadata": {},
    "outputs": [
     {
@@ -100,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 170,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -121,7 +121,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 171,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -410,7 +410,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 187,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -437,7 +437,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 188,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -458,14 +458,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 189,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loss: 0.07218991126027921\n"
+      "Loss: 0.09592802009114174\n"
      ]
     }
    ],
@@ -492,7 +492,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 190,
    "metadata": {},
    "outputs": [
     {
@@ -503,7 +503,7 @@
        "       [1., 0., 0.]])"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 190,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -521,7 +521,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 191,
    "metadata": {},
    "outputs": [
     {
@@ -552,14 +552,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 192,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loss: 0.7075029802848043\n"
+      "Loss: 0.7102590810256381\n"
      ]
     }
    ],
@@ -570,7 +570,7 @@
     "    w2,\n",
     "    b2,\n",
     "    random_data,\n",
-    "    random_targets,\n",
+    "    one_hot(random_targets),\n",
     "    0.1,\n",
     ")\n",
     "\n",
@@ -586,7 +586,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 193,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -597,139 +597,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 194,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Normalize data\n",
+    "norm_train_data_09 = train_data_09 / 255\n",
+    "norm_test_data_09 = test_data_09 / 255"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 195,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:221: RuntimeWarning: overflow encountered in exp\n",
-      "  z2 = np.matmul(a1, w2) + b2\n",
-      "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:118: RuntimeWarning: overflow encountered in exp\n",
-      "  z2 = np.matmul(a1, w2) + b2\n",
-      "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:16: RuntimeWarning: invalid value encountered in divide\n",
-      "  \n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 0: 0.10592592592592592\n",
-      "Epoch 1: 0.09855555555555555\n",
-      "Epoch 2: 0.10437037037037038\n",
-      "Epoch 3: 0.109\n",
-      "Epoch 4: 0.10901851851851851\n",
-      "Epoch 5: 0.10907407407407407\n",
-      "Epoch 6: 0.10044444444444445\n",
-      "Epoch 7: 0.0995925925925926\n",
-      "Epoch 8: 0.09985185185185186\n",
-      "Epoch 9: 0.09985185185185186\n",
-      "Epoch 10: 0.09985185185185186\n",
-      "Epoch 11: 0.09985185185185186\n",
-      "Epoch 12: 0.09985185185185186\n",
-      "Epoch 13: 0.09985185185185186\n",
-      "Epoch 14: 0.09985185185185186\n",
-      "Epoch 15: 0.09985185185185186\n",
-      "Epoch 16: 0.09985185185185186\n",
-      "Epoch 17: 0.09985185185185186\n",
-      "Epoch 18: 0.09985185185185186\n",
-      "Epoch 19: 0.09985185185185186\n",
-      "Epoch 20: 0.09985185185185186\n",
-      "Epoch 21: 0.09985185185185186\n",
-      "Epoch 22: 0.09985185185185186\n",
-      "Epoch 23: 0.09985185185185186\n",
-      "Epoch 24: 0.09985185185185186\n",
-      "Epoch 25: 0.09985185185185186\n",
-      "Epoch 26: 0.09985185185185186\n",
-      "Epoch 27: 0.09985185185185186\n",
-      "Epoch 28: 0.09985185185185186\n",
-      "Epoch 29: 0.09985185185185186\n",
-      "Epoch 30: 0.09985185185185186\n",
-      "Epoch 31: 0.09985185185185186\n",
-      "Epoch 32: 0.09985185185185186\n",
-      "Epoch 33: 0.09985185185185186\n",
-      "Epoch 34: 0.09985185185185186\n",
-      "Epoch 35: 0.09985185185185186\n",
-      "Epoch 36: 0.09985185185185186\n",
-      "Epoch 37: 0.09985185185185186\n",
-      "Epoch 38: 0.09985185185185186\n",
-      "Epoch 39: 0.09985185185185186\n",
-      "Epoch 40: 0.09985185185185186\n",
-      "Epoch 41: 0.09985185185185186\n",
-      "Epoch 42: 0.09985185185185186\n",
-      "Epoch 43: 0.09985185185185186\n",
-      "Epoch 44: 0.09985185185185186\n",
-      "Epoch 45: 0.09985185185185186\n",
-      "Epoch 46: 0.09985185185185186\n",
-      "Epoch 47: 0.09985185185185186\n",
-      "Epoch 48: 0.09985185185185186\n",
-      "Epoch 49: 0.09985185185185186\n",
-      "Epoch 50: 0.09985185185185186\n",
-      "Epoch 51: 0.09985185185185186\n",
-      "Epoch 52: 0.09985185185185186\n",
-      "Epoch 53: 0.09985185185185186\n",
-      "Epoch 54: 0.09985185185185186\n",
-      "Epoch 55: 0.09985185185185186\n",
-      "Epoch 56: 0.09985185185185186\n",
-      "Epoch 57: 0.09985185185185186\n",
-      "Epoch 58: 0.09985185185185186\n",
-      "Epoch 59: 0.09985185185185186\n",
-      "Epoch 60: 0.09985185185185186\n",
-      "Epoch 61: 0.09985185185185186\n",
-      "Epoch 62: 0.09985185185185186\n",
-      "Epoch 63: 0.09985185185185186\n",
-      "Epoch 64: 0.09985185185185186\n",
-      "Epoch 65: 0.09985185185185186\n",
-      "Epoch 66: 0.09985185185185186\n",
-      "Epoch 67: 0.09985185185185186\n",
-      "Epoch 68: 0.09985185185185186\n",
-      "Epoch 69: 0.09985185185185186\n",
-      "Epoch 70: 0.09985185185185186\n",
-      "Epoch 71: 0.09985185185185186\n",
-      "Epoch 72: 0.09985185185185186\n",
-      "Epoch 73: 0.09985185185185186\n",
-      "Epoch 74: 0.09985185185185186\n",
-      "Epoch 75: 0.09985185185185186\n",
-      "Epoch 76: 0.09985185185185186\n",
-      "Epoch 77: 0.09985185185185186\n",
-      "Epoch 78: 0.09985185185185186\n",
-      "Epoch 79: 0.09985185185185186\n",
-      "Epoch 80: 0.09985185185185186\n",
-      "Epoch 81: 0.09985185185185186\n",
-      "Epoch 82: 0.09985185185185186\n",
-      "Epoch 83: 0.09985185185185186\n",
-      "Epoch 84: 0.09985185185185186\n",
-      "Epoch 85: 0.09985185185185186\n",
-      "Epoch 86: 0.09985185185185186\n",
-      "Epoch 87: 0.09985185185185186\n",
-      "Epoch 88: 0.09985185185185186\n",
-      "Epoch 89: 0.09985185185185186\n",
-      "Epoch 90: 0.09985185185185186\n",
-      "Epoch 91: 0.09985185185185186\n",
-      "Epoch 92: 0.09985185185185186\n",
-      "Epoch 93: 0.09985185185185186\n",
-      "Epoch 94: 0.09985185185185186\n",
-      "Epoch 95: 0.09985185185185186\n",
-      "Epoch 96: 0.09985185185185186\n",
-      "Epoch 97: 0.09985185185185186\n",
-      "Epoch 98: 0.09985185185185186\n",
-      "Epoch 99: 0.09985185185185186\n",
-      "Epoch 100: 0.09985185185185186\n",
-      "Test accuracy: 0.10133333333333333\n"
+      "Epoch: 1 / 10\n",
+      "Epoch: 2 / 10\n",
+      "Epoch: 3 / 10\n",
+      "Epoch: 4 / 10\n",
+      "Epoch: 5 / 10\n",
+      "Epoch: 6 / 10\n",
+      "Epoch: 7 / 10\n",
+      "Epoch: 8 / 10\n",
+      "Epoch: 9 / 10\n",
+      "Epoch: 10 / 10\n",
+      "Epoch 0: 0.0857037037037037\n",
+      "Epoch 1: 0.08585185185185185\n",
+      "Epoch 2: 0.0865\n",
+      "Epoch 3: 0.08796296296296297\n",
+      "Epoch 4: 0.09007407407407407\n",
+      "Epoch 5: 0.0915\n",
+      "Epoch 6: 0.09418518518518519\n",
+      "Epoch 7: 0.09607407407407408\n",
+      "Epoch 8: 0.09666666666666666\n",
+      "Epoch 9: 0.0979074074074074\n",
+      "Epoch 10: 0.09770370370370371\n",
+      "Test accuracy: 0.10083333333333333\n"
      ]
     }
    ],
    "source": [
     "training_accuracy_values, test_accuracy = run_mlp_training(\n",
-    "    train_data_09,\n",
+    "    norm_train_data_09,\n",
     "    train_labels_09,\n",
-    "    test_data_09,\n",
+    "    norm_test_data_09,\n",
     "    test_labels_09,\n",
     "    d_h,\n",
     "    learning_rate,\n",
-    "    num_epoch\n",
+    "    100\n",
     ")\n",
     "\n",
     "for (i, training_accuracy) in enumerate(training_accuracy_values):\n",
diff --git a/mlp.py b/mlp.py
index 944df7fc4918ce010493b179bb1911cea67dd881..905d829046240ba2066e487db903bf3c30e2a3e8 100644
--- a/mlp.py
+++ b/mlp.py
@@ -13,7 +13,19 @@ def softmax(x: np.ndarray) -> np.ndarray:
         np.ndarray: The softmax of the given vector.
     """
     e_x = np.exp(x - np.max(x))
-    return e_x / e_x.sum(axis=0)
+    return e_x / (e_x.sum(axis=0) + EPSILON)
+
+
+def sigmoid(x: np.ndarray) -> np.ndarray:
+    """Compute the sigmoid of the given vector.
+
+    Args:
+        z (np.ndarray): The vector to compute the sigmoid of.
+
+    Returns:
+        np.ndarray: The sigmoid of the given vector.
+    """
+    return 1 / (1 + np.exp(-x))
 
 
 def learn_once_mse(
@@ -43,13 +55,9 @@ def learn_once_mse(
     # Forward pass
     a0 = data  # the data are the input of the first layer
     z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
-    a1 = 1 / (
-        1 + np.exp(-z1)
-    )  # output of the hidden layer (sigmoid activation function)
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
     z2 = np.matmul(a1, w2) + b2  # input of the output layer
-    a2 = 1 / (
-        1 + np.exp(-z2)
-    )  # output of the output layer (sigmoid activation function)
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
     predictions = a2  # the predicted values are the outputs of the output layer
 
     # Compute loss (MSE)
@@ -58,7 +66,7 @@ def learn_once_mse(
     # Backward pass
 
     # Compute gradients
-    dC_da2 = 2 * (predictions - targets) / predictions.shape[0]
+    dC_da2 = 2 * (predictions - targets) / data.shape[0]
     dC_dz2 = dC_da2 * a2 * (1 - a2)
     dC_dw2 = np.matmul(a1.T, dC_dz2)
     dC_db2 = np.sum(dC_dz2, axis=0)
@@ -115,26 +123,24 @@ def learn_once_cross_entropy(
     # Forward pass
     a0 = data
     z1 = np.matmul(a0, w1) + b1
-    a1 = 1 / (1 + np.exp(-z1))
+    a1 = sigmoid(z1)
     z2 = np.matmul(a1, w2) + b2
     a2 = softmax(z2)
 
     predictions = a2
 
-    one_hot_targets = one_hot(labels_train)
-
     # Compute loss (Cross Entropy)
     # https://arize.com/blog-course/binary-cross-entropy-log-loss/
     loss = -np.mean(
-        one_hot_targets * np.log(predictions + EPSILON)
-        + (1 - one_hot_targets) * np.log(1 - predictions + EPSILON)
+        labels_train * np.log(predictions + EPSILON)
+        + (1 - labels_train) * np.log(1 - predictions + EPSILON)
     )
 
     # Backward pass
 
     # Compute gradients
 
-    dC_dz2 = a2 - one_hot_targets
+    dC_dz2 = (a2 - labels_train) / data.shape[0]
     dC_dw2 = np.matmul(a1.T, dC_dz2)
     dC_db2 = np.sum(dC_dz2, axis=0)
     dC_da1 = np.matmul(dC_dz2, w2.T)
@@ -181,10 +187,14 @@ def train_mlp(
     accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train)
     accuracies = [accuracy]
 
-    for _ in range(num_epoch):
+    one_hot_labels = one_hot(labels_train)
+
+    for epoch in range(num_epoch):
         # Train once
+        print("Epoch:", epoch + 1, "/", num_epoch)
+
         w1, b1, w2, b2, _ = learn_once_cross_entropy(
-            w1, b1, w2, b2, data_train, labels_train, learning_rate
+            w1, b1, w2, b2, data_train, one_hot_labels, learning_rate
         )
 
         # Compute current model training accuracy
@@ -218,7 +228,7 @@ def test_mlp(
     # Forward pass
     a0 = data_test
     z1 = np.matmul(a0, w1) + b1
-    a1 = 1 / (1 + np.exp(-z1))
+    a1 = sigmoid(z1)
     z2 = np.matmul(a1, w2) + b2
     a2 = softmax(z2)
     predictions = a2
diff --git a/results/mlp.png b/results/mlp.png
index 7adf3430932d16a46e17db300e3a1335a7211aac..de1a6be9de2e259aaaea5d0553b55697bf7bb139 100644
Binary files a/results/mlp.png and b/results/mlp.png differ