diff --git a/mlp.py b/mlp.py
index bfc602bed3a8aeb5ee01b800c58acf0daaff20ff..222225cd81538be3802564086da0e179efaafbbc 100644
--- a/mlp.py
+++ b/mlp.py
@@ -11,16 +11,16 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
     # Forward pass
     a0 = data # the data are the input of the first layer
     z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
-    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
     z2 = np.matmul(a1, w2) + b2  # input of the output layer
-    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
     predictions = a2  # the predicted values are the outputs of the output layer
     
     # Compute loss (MSE)
     loss = np.mean(np.square(predictions - targets))
     
     # According to the formulas established by theory :
-    d_a2 = 2 / N * (1 - targets)
+    d_a2 = 2 / N * (a2 - targets)
     d_z2 = d_a2 * a2 * (1 - a2)
     d_w2 = np.matmul(a1.T, d_z2)
     d_b2 = d_z2
@@ -46,30 +46,36 @@ def one_hot(labels):
     return one_hot_matrix
 
 
+def softmax(x):
+    e_x = np.exp(x - np.max(x))  # Subtracting the maximum value for numerical stability
+    return e_x / e_x.sum(axis=0)
+
+
 def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
     N = len(labels_train) # number of training examples
     
     # Forward pass
     a0 = data # the data are the input of the first layer
     z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
-    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
     z2 = np.matmul(a1, w2) + b2  # input of the output layer
-    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+    a2 = softmax(z2)  # output of the output layer (softmax activation function)
     predictions = a2  # the predicted values are the outputs of the output layer
     
     targets_one_hot = one_hot(labels_train) # target as a one-hot encoding for the desired labels
     
     # Cross-entropy loss
-    loss = -np.sum(targets_one_hot * np.log(predictions)) / N
+    epsilon = 0.00001
+    loss = - np.sum(targets_one_hot * np.log(predictions + epsilon)) / N
     
     # Backpropagation
     d_z2 = a2 - targets_one_hot
     d_w2 = np.dot(a1.T, d_z2) / N
-    d_b2 = d_z2 / N
+    d_b2 = np.sum(d_z2, axis = 0, keepdims = True) / N
     d_a1 = np.dot(d_z2, w2.T)
     d_z1 = d_a1 * z1 * (1 - a1)
     d_w1 = np.dot(a0.T, d_z1) / N
-    d_b1 = d_z1 / N
+    d_b1 = np.sum(d_z1, axis = 0, keepdims = True) / N
     
     # Calculation of the updated weights and biases of the network with gradient descent method
     w1 -= learning_rate * d_w1
@@ -88,9 +94,9 @@ def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch
         # Forward pass
         a0 = data_train # the data are the input of the first layer
         z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
-        a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+        a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
         z2 = np.matmul(a1, w2) + b2  # input of the output layer
-        a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+        a2 = softmax(z2)  # output of the output layer (softmax activation function)
         predictions = a2  # the predicted values are the outputs of the output layer
         
         # Find the predicted class
@@ -108,9 +114,9 @@ def test_mlp(w1, b1, w2, b2, data_test, labels_test):
     # Forward pass
     a0 = data_test # the data are the input of the first layer
     z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
-    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
     z2 = np.matmul(a1, w2) + b2  # input of the output layer
-    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+    a2 = softmax(z2)  # output of the output layer (softmax activation function)
     predictions = a2  # the predicted values are the outputs of the output layer
     
     # Find the predicted label
@@ -128,10 +134,10 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear
     d_in = data_train.shape[1] # number of input neurons
     d_out = len(np.unique(labels_train)) # number of output neurons = number of classes
     
-    # Random initialization of the network weights and biaises
-    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    # Random initialization of the network weights and biaises with Xavier initialisation
+    w1 = np.random.randn(d_in, d_h) / np.sqrt(d_in)  # first layer weights
     b1 = np.zeros((1, d_h))  # first layer biaises
-    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    w2 = np.random.randn(d_h, d_out) / np.sqrt(d_h)  # second layer weights
     b2 = np.zeros((1, d_out))  # second layer biaises
     
     # Training of the MLP classifier with num_epoch steps
@@ -145,26 +151,28 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear
 
 if __name__ == "__main__":
     
-    split_factor = 0.9
+    # Parameters
+    split_factor = 0.1
     d_h = 64
     learning_rate = 0.1
     num_epoch = 100
     
+    # Extraction and formatting of the data from Cifar database
     data, labels = read_cifar("./data/cifar-10-batches-py")
     data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split_factor)
     
+    # Initialisation of the data to plot
     epochs = [i for i in range(1, num_epoch + 1)]
-    learning_accuracy = [0] * num_epoch
-    
-    for i in range(num_epoch) :
-        train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, i + 1)
-        learning_accuracy[i] = test_accuracy
+    train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch)
     
-    plt.plot(epochs, learning_accuracy)
+    # Plot the graph
+    plt.close()
+    plt.plot(epochs, train_accuracies)
     plt.title("Evolution of learning accuracy across learning epochs")
     plt.xlabel("number of epochs")
     plt.ylabel("Accuracy")
     plt.grid(True, which='both')
+    plt.show()
     plt.savefig("results/mlp.png")