diff --git a/final_run b/final_run
new file mode 100644
index 0000000000000000000000000000000000000000..46dc7567b649f0425e7d6e38f3a4204efaae0e8b
--- /dev/null
+++ b/final_run
@@ -0,0 +1,10 @@
+from read_cifar import read_cifar, split_dataset
+from mlp import run_mlp_training
+import matplotlib.pyplot as plt
+
+if __name__ == "__main__":
+    data, labels = read_cifar()
+    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split=0.9)
+    list_accuracies, final_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h=64, learning_rate=0.1, num_epoch=100)
+    plt.plot(list_accuracies)
+    plt.savefig("accuracies_mlp_network")
diff --git a/knn.py b/knn.py
index b39e134fd27b472ae240e9c9f7b7f6dc208fd58c..25870cf3ef068b41fafd44b32195c3fa3d812fb5 100644
--- a/knn.py
+++ b/knn.py
@@ -12,7 +12,7 @@ def distance_matrix(matrix_a: np.ndarray, matrix_b: np.ndarray):
     return dists
 
 def knn_predict(dists: np.ndarray, labels_train: np.ndarray, k:int):
-    labels_predicts = np.zeros(np.size(dist, 0))
+    labels_predicts = np.zeros(np.size(dists, 0))
     for i in range(np.size(labels_predicts, 0)):
         #On extrait les indices des k valeurs plus petites (des k plus proches voisins)
         k_neighbors_index = np.argmin(dists[i, :], np.sort(dists[i, :])[:k])
diff --git a/mlp.py b/mlp.py
index 92a28be21705e6c026265e879d790234c3c39204..f37afc7bf6ed77a31494f808bc670cdb31f4e76d 100644
--- a/mlp.py
+++ b/mlp.py
@@ -2,7 +2,16 @@ import numpy as np
 import math
 
 def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, targets: np.ndarray, learning_rate: float):
-
+    """
+    :w1: weights of the first layer of the network.
+    :b1: bias of the first layer of the network.
+    :w2: weights of the second layer of the network.
+    :b2: bias of the second layer of the network.
+    :data: input vector of the network.
+    :targets: output vector to reach.
+    :learning_rate: factor for the gradient descent learning (quickness of the descent).
+    :return: updated weights and biases of the network after 1 loop of gradient descent.
+    """
     # Forward pass
     N = np.size(data, 0)
     a0 = data # the data are the input of the first layer
@@ -14,7 +23,6 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
 
     # Compute loss (MSE)
     loss = np.mean((predictions - targets)**2)
-    print(loss)
 
     #Compute gradient dW
     da2 = 2/N*(a2-targets)
@@ -22,7 +30,7 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
     dw2 = dz2*a1
     db2 = dz2
     da1 = dz2*np.sum(w2, axis=1)
-    dz1 = da1*a1*(1*a1)
+    dz1 = da1*a1*(1-a1)
     dw1 = dz1*a0
     db1 = dz1
 
@@ -33,3 +41,161 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
 
     
     return w1, b1, w2, b2, loss
+
+
+def one_hot(label=np.ndarray):
+    """
+    Encode une suite d'entier en binaire : encodeur one-hot.
+
+    :label: La suite d'entier à encoder.
+    :return: la matrice encodée.
+    """
+    result = np.zeros((np.size(label, 0), np.size(label, 0)))
+    for i in range(np.size(label, 0)):
+        result[i] = convert_integer_to_binary(label[i], np.size(label, 0))
+    return result
+
+def convert_integer_to_binary(integer, size):
+    """
+    Convert an integer into a binary vector with a specified size.
+
+    :integer: Integer to convert to binary..
+    :taille: Size of the specified binary vector.
+    :return: The converted binary vector.
+    """
+    binary = []
+    while integer > 0:
+        binary.insert(0, integer % 2)
+        integer //= 2
+
+    # Fill with zero on the left if necessary to reach the specified size
+    while len(binary) < size:
+        binary.insert(0, 0)
+
+    return np.array(binary)
+
+
+def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, labels_train: np.ndarray, learning_rate: np.ndarray):
+    """
+    :w1: weights of the first layer of the network.
+    :b1: bias of the first layer of the network.
+    :w2: weights of the second layer of the network.
+    :b2: bias of the second layer of the network.
+    :data: input vector of the network.
+    :labels_train: output vector for the training of the network.
+    :learning_rate: factor for the gradient descent learning (quickness of the descent).
+    :return: updated weights and biases of the network after 1 loop of gradient descent, and the loss value.
+    """
+    
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+
+    encoded_vector = one_hot(labels_train)
+    dz2 = a2 - encoded_vector
+    dw2 = dz2*a1
+    db2 = dz2
+    da1 = dz2*np.sum(w2, axis=1)
+    dz1 = da1*a1*(1-a1)
+    dw1 = dz1*a0
+    db1 = dz1
+
+    w1 -= learning_rate*dw1
+    w2 -= learning_rate*dw2
+    b1 -= learning_rate*db1
+    b2 -= learning_rate*db2
+
+    m = np.size(data, 0)
+    loss = (-1/m) * np.sum(labels_train * np.log(a2) + (1 - labels_train) * np.log(1 - a2))
+
+    return w1, b1, w2, b2, loss
+
+def train_mlp(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data_train: np.ndarray, labels_train: np.ndarray, learning_rate: float, num_epoch: int):
+    """
+    :w1: weights of the first layer of the network.
+    :b1: bias of the first layer of the network.
+    :w2: weights of the second layer of the network.
+    :b2: bias of the second layer of the network.
+    :data_train: input training vector.
+    :labels_train: output training vector.
+    :learning_rate: factor for the gradient descent learning (quickness of the descent).
+    :num_epoch: number of training loops (gradient descent).
+    :return: updated weights and biases of the network after num_epoch loop of gradient descent, accuracy at each loop.
+    """
+    c=0
+    accuracies=[]
+    while c<num_epoch:
+        w1, b1, w2, b2, _ = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
+        c+=1
+
+        # Forward pass
+        a0 = data_train
+        z1 = np.matmul(a0, w1) + b1
+        a1 = 1 / (1 + np.exp(-z1))
+        z2 = np.matmul(a1, w2) + b2
+        a2 = 1 / (1 + np.exp(-z2))
+        accuracies = compute_accuracy(a2, labels_train)
+    return w1, b1, w2, b2, accuracies
+
+def compute_accuracy(y_predict, y_target):
+    true = 0
+    for i in range(np.size(y_predict, 0)):
+        if y_predict[i] == y_target[0]:
+            true += 1
+    return true/np.size(y_predict, 0)
+
+def test_mlp(w1: np.ndarray, b1: np.ndarray, w2:np.ndarray, b2:np.ndarray, data_test: np.ndarray, labels_test: np.ndarray):
+    """
+    :w1: weights of the first layer of the network.
+    :b1: bias of the first layer of the network.
+    :w2: weights of the second layer of the network.
+    :b2: bias of the second layer of the network.
+    :data_test: input testing vector.
+    :labels_train: output testing vector.
+    :return: the accuracy of the test.
+    """
+    
+
+    w1, b1, w2, b2, _ = train_mlp(w1, b1, w2, b2, data_test, labels_test)
+
+    a0 = data_test
+    z1 = np.matmul(a0, w1) + b1
+    a1 = 1 / (1 + np.exp(-z1))
+    z2 = np.matmul(a1, w2) + b2
+    y_predict = 1 / (1 + np.exp(-z2))
+    test_accuracy = compute_accuracy(y_predict, labels_test)
+
+    return test_accuracy
+
+def run_mlp_training(data_train:np.ndarray, labels_train:np.ndarray, data_test:np.ndarray, labels_test:np.ndarray, d_h: int, learning_rate: float, num_epoch: int):
+    """
+    :data_train: input training vector.
+    :labels_train: output training vector.
+    :data_test: input testing vector.
+    :labels_test: output testing vector.
+    :d_h: number of neurons on the hidden layer.
+    :learning_rate: factor for the gradient descent learning (quickness of the descent).
+    :num_epoch: number of training loops (gradient descent).
+    :return: the training accuracies across epochs as a list of floats and the final testing accuracy as a float.
+    """
+    #Number of neurons on the first and the last layer.
+    d_in = np.size(data_train, 1)
+    d_out = np.size(data_test, 0)    
+
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+
+    w1, b1, w2, b2, list_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
+    w1, b1, w2, b2, final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
+
+    return list_accuracies, final_accuracy
+
+
+
+
diff --git a/read_cifar.py b/read_cifar.py
index df7393ff90a0274df9ffd8c6cd2f753ff5013e3b..8e56b8764d61e33b4650149dd165a14adec46e3c 100644
--- a/read_cifar.py
+++ b/read_cifar.py
@@ -51,4 +51,3 @@ def split_dataset(data: np.ndarray, labels: np.ndarray, split: float):
 if __name__ == "__main__":
     data, labels = read_cifar()
     data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8)
-    print(1)