diff --git a/knn.py b/knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..6332eddc448d7c139f676123fdab10b02b355f2c
--- /dev/null
+++ b/knn.py
@@ -0,0 +1,58 @@
+import read_cifar
+import numpy as np
+
+def distance_matrix(matrix1, matrix2):
+    #X_test then X_train in this order
+    sum_of_squares_matrix1 = np.sum(np.square(matrix1), axis=1, keepdims=True)
+    sum_of_squares_matrix2 = np.sum(np.square(matrix2), axis=1, keepdims=True)
+
+    dot_product = np.dot(matrix1, matrix2.T)
+    
+    dists = np.sqrt(sum_of_squares_matrix1 + sum_of_squares_matrix2.T - 2 * dot_product)
+    return dists
+
+def knn_predict(dists, labels_train, k):
+    output = []
+    for i in range(len(dists)):
+        res = [0] * 10
+        b = np.argsort(dists[i])[:k]
+        for lab in b:
+            res[labels_train[lab]] += 1
+        label_temp = np.argmax(res) #Attention à la logique ici
+        output.append(label_temp)
+    return(np.array(output))
+
+def evaluate_knn(data_train, labels_train, data_test, labels_tests, k):
+    dist = distance_matrix(data_test, data_train)
+    result_test = knn_predict(dist, labels_train, k)
+
+    #accuracy 
+    N = labels_tests.shape[0]
+    accuracy = (labels_tests == result_test).sum() / N
+    return(accuracy)
+    
+
+
+
+
+
+if __name__ == "__main__":
+
+    data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py')
+    X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.8)
+    print(evaluate_knn(X_train[:1000], y_train[:1000], X_test, y_test, 5))
+
+
+
+    # print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
+
+    # y_test = []
+    # x_test = np.array([[1,2],[4,6]])
+    # x_train = np.array([[2,4],[7,2],[4,6]])
+    # y_train = [1,2,1]
+    # dist = distance_matrix(x_test,x_train)
+
+ 
+
+    
+    
\ No newline at end of file
diff --git a/mlp.py b/mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..36635fb25f0d27ad67a7f01951b80867322ffce5
--- /dev/null
+++ b/mlp.py
@@ -0,0 +1,254 @@
+import numpy as np
+import read_cifar
+import matplotlib.pyplot as plt
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+
+def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
+
+    N_out = len(targets) #number of training examples
+
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - targets))
+    print(f'loss: {loss}')
+    # print('shape a1', a1.shape)
+    # print('shape w1', w1.shape)
+    # print('shape b1', b1.shape)
+
+    # print('shape a2', a2.shape)
+    # print('shape w2', w2.shape)
+    # print('shape b2', b2.shape)
+   
+    # Backpropagation
+    delta_a2 = 2 / N_out * (a2 - targets)
+    # print('shape delta_a2', delta_a2.shape)
+    delta_z2 = delta_a2 * (a2 * (1 - a2)) 
+    # print('shape delta_z2', delta_z2.shape)
+    delta_w2 = np.dot(a1.T, delta_z2)
+    # print('shape delta_w2', delta_w2.shape)
+    delta_b2 = delta_z2
+
+    delta_a1 = np.dot(delta_z2, w2.T)
+    # print('shape delta_a1', delta_a1.shape)
+    delta_z1 = delta_a1 * (a1 * (1- a1))
+    # print('shape delta_z1', delta_z1.shape)
+    delta_w1 = np.dot(a0.T, delta_z1)
+    # print('shape delta_w1', delta_w2.shape)
+    delta_b1 = delta_z1
+
+    # Update weights and biases
+    w2 -= learning_rate * delta_w2
+    b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
+
+    w1 -= learning_rate * delta_w1
+    b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
+
+    return w1, b1, w2, b2, loss
+
+def one_hot(labels):
+    #num_classes = np.max(labels) + 1 on va le hardcoder ici
+    num_classes = 10
+    one_hot_matrix = np.eye(num_classes)[labels]
+    return one_hot_matrix
+
+def softmax_stable(x):
+    #We use this function to avoid computing to big numbers
+    return(np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum())
+
+def cross_entropy_loss(y_pred, y_true):
+    loss = -np.sum(y_true * np.log(y_pred)) / len(y_pred)
+    return loss
+
+
+def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
+
+    N_out = len(labels_train) #number of training examples
+
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = softmax_stable(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+
+    # Compute loss (cross-entropy loss)
+    y_true_one_hot = one_hot(labels_train)
+    loss = cross_entropy_loss(predictions, y_true_one_hot)
+
+
+    # Backpropagation
+    # delta_a2 = 2 / N_out * (a2 - labels_train) ceci n'est plus nécessaire ici
+    delta_z2 = (a2 - y_true_one_hot) 
+    delta_w2 = np.dot(a1.T, delta_z2) / N_out # on divise par N_out pour ne pas faire des saut de gradient trop elevés
+    delta_b2 = delta_z2 / N_out
+
+
+    delta_a1 = np.dot(delta_z2, w2.T)
+    delta_z1 = delta_a1 * (a1 * (1 - a1))
+    delta_w1 = np.dot(a0.T, delta_z1) / N_out
+    delta_b1 = delta_z1 / N_out
+
+    # Update weights and biases
+    w2 -= learning_rate * delta_w2
+    b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
+
+    w1 -= learning_rate * delta_w1
+    b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
+
+    return w1, b1, w2, b2, loss
+
+def learn_once_cross_entropy_2(w1, w2, data, labels_train, learning_rate):
+
+    N_out = len(labels_train) #number of training examples
+
+    # Forward pass
+    # Feedforward propagation
+    z1 = np.dot(data, w1)
+    a1 = sigmoid(z1)
+    z2 = np.dot(a1, w2)
+    a2 = sigmoid(z2)
+
+
+    # Compute loss (cross-entropy loss)
+    y_true_one_hot = one_hot(labels_train)
+    loss = cross_entropy_loss(a2, y_true_one_hot)
+    
+    # Backpropagation
+    E1 = a2 - np.eye(10)[labels_train]
+    dw1 = E1 * a2 * (1 - a2)
+    E2 = np.dot(dw1, w2.T)
+    dw2 = E2 * a1 * (1 - a1)
+
+    # Update weights
+    W2_update = np.dot(a1.T, dw1) / N_out
+    W1_update = np.dot(data.T, dw2) / N_out
+    w2 = w2 - learning_rate * W2_update
+    w1 = w1 - learning_rate * W1_update
+
+    return w1, w2, loss
+
+def forward_2(w1, w2, data):
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1)  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2)  # input of the output layer
+    a2 = softmax_stable(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    return(predictions)
+
+def forward(w1, b1, w2, b2, data):
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = softmax_stable(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    return(predictions)
+
+def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
+    train_accuracies = []
+    for epoch in range(num_epoch):
+        w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
+
+        # Compute accuracy
+        predictions = forward(w1, b1, w2, b2, data_train)
+        predicted_labels = np.argmax(predictions, axis=1)
+        # print(predictions.shape)
+        # print(predicted_labels.shape)
+        # print(labels_train.shape)
+        accuracy = np.mean(predicted_labels == labels_train)
+        train_accuracies.append(accuracy)
+
+        print(f'Epoch {epoch + 1}/{num_epoch}, Loss: {loss:.3f}, Train Accuracy: {accuracy:.2f}')
+
+    return w1, b1, w2, b2, train_accuracies
+def train_mlp_2(w1, w2, data_train, labels_train, learning_rate, num_epoch):
+    train_accuracies = []
+    for epoch in range(num_epoch):
+        w1, w2, loss = learn_once_cross_entropy_2(w1, w2, data_train, labels_train, learning_rate)
+        # Compute accuracy
+        predictions = forward_2(w1, w2, data_train)
+        predicted_labels = np.argmax(predictions, axis=1)
+        # print(predictions.shape)
+        # print(predicted_labels.shape)
+        # print(labels_train.shape)
+        accuracy = np.mean(predicted_labels == labels_train)
+        train_accuracies.append(accuracy)
+
+        print(f'Epoch {epoch + 1}/{num_epoch}, Loss: {loss:.3f}, Train Accuracy: {accuracy:.2f}')
+
+    return w1, w2, train_accuracies
+
+def test_mlp(w1, b1, w2, b2, data_test, labels_test):
+ 
+    # Compute accuracy
+    predictions = forward(w1, b1, w2, b2, data_test)
+    predicted_labels = np.argmax(predictions, axis=1)
+    print(predicted_labels)
+    test_accuracy = np.mean(predicted_labels == labels_test)
+    print(f'Train Accuracy: {test_accuracy:.2f}')
+    return test_accuracy
+
+def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h,learning_rate, num_epoch):
+
+    d_in = data_train.shape[1]
+    d_out = 10 #we can hard code it here or len(np.unique(label_train))
+
+    #Random initialisation of weights
+    w1 = np.random.randn(d_in, d_h)
+    b1 = np.random.randn(1, d_h)
+
+    w2 = np.random.randn(d_h, d_out)
+    b2 = np.random.randn(1, d_out)
+
+    # Train MLP
+    w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
+
+    # Test MLP
+    test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
+    return train_accuracies, test_accuracy
+
+
+if __name__ == '__main__':
+    data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py')
+    X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9)
+
+    d_in, d_h, d_out = 3072, 728, 10
+    w1 = np.random.normal(scale=0.5, size=(d_in, d_h))
+    b1 = np.random.randn(1, d_h)
+    w2 = np.random.normal(scale=0.5, size=(d_h, d_out))
+    b2 = np.random.randn(1, d_out)
+
+    # print(forward(w1, b1, w2, b2,X_train[:1]))
+    # for i in range(100):
+    #     learn_once_cross_entropy(w1, b1, w2, b2, X_train[:1000], y_train[:1000], 0.005)
+    train_mlp(w1, b1, w2, b2, X_train[:10000], y_train[:10000], 0.1, 100)
+    # train_mlp_2(w1, w2, X_train[:10000], y_train[:10000], 0.05, 100)
+    # test_mlp(w1, b1, w2, b2, X_test[:50], y_test[:50])
+
+
+    
+    # values = [2, 4, 5, 3]
+    # # Output achieved
+    # output = softmax_stable(values)
+    # y_true = [3, 1] # 1 observation
+    # y_true_one_hot = one_hot(y_true)
+    # print(y_true_one_hot)
+    # y_pred = [[0.1, 0.1, 0.1, 0.7],[0.1, 0.1, 0.1, 0.7]]
+    # loss = cross_entropy_loss(y_pred, y_true_one_hot)
+    # print(loss)
\ No newline at end of file
diff --git a/read_cifar.py b/read_cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..72f04ee304c076525190a54b8679cf7d2419cbf5
--- /dev/null
+++ b/read_cifar.py
@@ -0,0 +1,57 @@
+import numpy as np
+import pickle
+from sklearn.model_selection import train_test_split 
+import pandas as pd
+
+
+
+# batch.meta 
+#{b'num_cases_per_batch': 10000, b'label_names': [b'airplane', b'automobile', b'bird', b'cat', b'deer', b'dog', b'frog', b'horse', b'ship', b'truck'], b'num_vis': 3072}
+
+def read_cifar_batch(file):
+    with open(file, 'rb') as fo:
+        dict = pickle.load(fo, encoding='bytes')
+    # keys = [b'batch_label',
+    #     b'labels',
+    #     b'data',
+    #     b'filenames']
+    return (np.array(dict[b'data']).astype('float32'), np.array(dict[b'labels']).astype('int64') )
+
+def read_cifar(path):
+    data = []
+    labels = []
+
+    #Add the 5 batches
+    for i in range(1,6):
+        data_temp, labels_temps = read_cifar_batch(f'{path}/data_batch_{i}')
+        data.append(data_temp)
+        labels.append(labels_temps)
+
+    #Add the test batches
+    data_temp, labels_temps = read_cifar_batch(f'{path}/test_batch')
+    data.append(data_temp)
+    labels.append(labels_temps)
+
+    #Concatenate all the batches to create a big one
+    data = np.concatenate(data, axis = 0)
+    labels = np.concatenate(labels, axis = 0)
+
+    return(data, labels)
+
+def split_dataset(data, labels, split):
+    X_train, X_test, y_train, y_test = train_test_split(
+    data, labels, test_size=(1 - split), random_state=0)
+
+    return(X_train, X_test, y_train, y_test)
+
+
+
+
+if __name__ == "__main__":
+    path = 'image-classification/data/cifar-10-batches-py/data_batch_1'
+    main_path = 'image-classification/data/cifar-10-batches-py'
+    data, labels = read_cifar_batch(path)
+    data, labels = read_cifar(main_path)
+    X_train, X_test, y_train, y_test = split_dataset(data, labels, 0.8)
+    # print(X_train, X_test, y_train, y_test)
+    # print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)