diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..68bc17f9ff2104a9d7b6777058bb4c343ca72609
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/data/cifar-10-batches-py/data_batch_1 b/data/cifar-10-batches-py/data_batch_1
new file mode 100644
index 0000000000000000000000000000000000000000..ab404a5ac32492b807a5c6cd02b83dc4dd5ff980
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_1 differ
diff --git a/data/cifar-10-batches-py/data_batch_2 b/data/cifar-10-batches-py/data_batch_2
new file mode 100644
index 0000000000000000000000000000000000000000..6bf1369a6cacadfdbd2f8c61e354cc7d0c17bbae
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_2 differ
diff --git a/data/cifar-10-batches-py/data_batch_3 b/data/cifar-10-batches-py/data_batch_3
new file mode 100644
index 0000000000000000000000000000000000000000..66a0d630a7eb736563b1861ce716bdc489f2113b
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_3 differ
diff --git a/data/cifar-10-batches-py/data_batch_4 b/data/cifar-10-batches-py/data_batch_4
new file mode 100644
index 0000000000000000000000000000000000000000..cf8d03d1e80e6d9e440d1764faa85aedd1d6b960
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_4 differ
diff --git a/data/cifar-10-batches-py/data_batch_5 b/data/cifar-10-batches-py/data_batch_5
new file mode 100644
index 0000000000000000000000000000000000000000..468b2aa538c551bc9f590f213b19d96915b85062
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_5 differ
diff --git a/data/cifar-10-batches-py/test_batch b/data/cifar-10-batches-py/test_batch
new file mode 100644
index 0000000000000000000000000000000000000000..3e03f1fc5261d102600fc1c130454f1f5cda567b
Binary files /dev/null and b/data/cifar-10-batches-py/test_batch differ
diff --git a/knn.py b/knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bf127dfe535a0000c5b171df1a0e23ea9035d4f
--- /dev/null
+++ b/knn.py
@@ -0,0 +1,124 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import read_cifar
+import os
+
+#La matrice de distance est composé de N lignes et M colonnes,
+#avec pour chaque élément la distance entre unne image test et une image entrainement
+#La distance est tout simplement calculée pixel par pixel, puis on fait la somme
+#pour avoir la distance totale de l'image
+#Cette méthode n'est pas la plus efficace aujourd'hui, mais permet d'avoir une
+#première idée
+def distance_matrix(matrix1, matrix2):
+    # Calculate the squared sum of matrix1
+    sum_matrix1 = np.sum(matrix1**2, axis=1, keepdims=True)
+
+    # Calculate the squared sum of matrix2
+    sum_matrix2 = np.sum(matrix2**2, axis=1, keepdims=True)
+
+    # Compute the dot product between matrix1 and matrix2
+    dot_product = np.dot(matrix1, matrix2.T)
+
+    # Compute the Euclidean distance matrix
+    dists = np.sqrt(sum_matrix1 - 2 * dot_product + sum_matrix2.T)
+
+    return dists
+
+#Test
+# Create two example matrices
+matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+matrix2 = np.array([[10, 11, 12], [13, 14, 15], [16, 17, 18]])
+
+# Compute the Euclidean distance matrix
+dists = distance_matrix(matrix1, matrix2)
+
+##print(dists)
+
+#La fonction knn_predicts est assez simple :
+#On regarde la matrice de distance pour une image, on la trie dans l'ordre croissant
+#(donc avec les images les plus "proches" d'abord), puis on regarde les labels
+#des k premières images : on prend ensuite le label qui revient le plus
+##def knn_predict(dists, labels_train, k):
+##    # Initialize an empty array to store the predicted labels
+##    predicted_labels = []
+##    # Loop through each row in the distance matrix (each test example)
+##    for i in range(dists.shape[0]):
+##        # Get the distances for the current test example
+##        distances = dists[i]
+##        # Get the indices of the k nearest neighbors
+##        nearest_indices = np.argsort(distances)[:k]
+##
+##        # Get the labels of the k nearest neighbors
+##        nearest_labels = [labels_train[idx] for idx in nearest_indices]
+##
+##        # Use a voting mechanism to determine the predicted label
+##        predicted_label = max(set(nearest_labels), key=nearest_labels.count)
+##
+##        # Append the predicted label to the result array
+##        predicted_labels.append(predicted_label)
+##    return predicted_labels
+
+def knn_predict(dists, labels_train, k):
+    # Use np.argpartition to find the indices of the k nearest neighbors for all test examples
+    nearest_indices = np.argpartition(dists, k, axis=1)[:, :k]
+
+    # Get the labels of the k nearest neighbors for all test examples
+    nearest_labels = labels_train[nearest_indices]
+
+    # Use a voting mechanism to determine the predicted labels for all test examples
+    predicted_labels = np.array([np.argmax(np.bincount(nearest_labels[i])) for i in range(nearest_labels.shape[0])])
+
+    return predicted_labels
+
+#Dans cette fonction on calcule le taux de classification,
+#càd le nombre d'observation correctement classées sur le nombre total
+#d'observations. Pour cela, on va d'abord entrainer l'algorithme avec
+#la base d'entraînement, puis on va vérifier avec la base de test
+def evaluate_knn(data_train,labels_train,data_test,labels_test,k):
+    # Calculate the distance matrix between the training and test data
+    dists = distance_matrix(data_test, data_train)
+    
+    # Use the knn_predict function to get predicted labels for the test data
+    predicted_labels = knn_predict(dists, labels_train, k)
+    
+    # Initialize a variable to count the number of correct predictions
+    correct_predictions = 0
+    
+    # Loop through the predicted and true labels and count the correct predictions
+    for predicted_label, true_label in zip(predicted_labels, labels_test):
+        if predicted_label == true_label:
+            correct_predictions += 1
+    
+    # Calculate accuracy as the ratio of correct predictions to the total number of test instances
+    accuracy = correct_predictions / len(labels_test) * 100
+    return accuracy
+
+if __name__ == "__main__":
+    data_folder = 'data/cifar-10-batches-py'
+    batch_filename = 'data_batch_1' # Adjust this to the specific batch file you want to read
+
+    batch_path = os.path.join(data_folder, batch_filename)
+
+    data, labels = read_cifar.read_cifar_batch(batch_path)
+    data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9)
+    print(len(data_train),len(data_test))
+    # Initialize lists to store k values and corresponding accuracies
+    k_values = list(range(1, 21))
+    accuracies = []
+    # Calculate accuracy for different values of k
+    for k in k_values:
+        accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
+        accuracies.append(accuracy)
+    # Create a plot of accuracy vs. k values
+    plt.figure(figsize=(10, 6))
+    plt.plot(k_values, accuracies, marker='o', linestyle='-', color='b')
+    plt.title('Accuracy vs. k for k-Nearest Neighbors')
+    plt.xlabel('k (Number of Neighbors)')
+    plt.ylabel('Accuracy (%)')
+    plt.grid(True)
+
+    # Save the plot as "knn.png" in the "results" directory
+    plt.savefig('results/knn.png')
+
+    # Show the plot (optional)
+    plt.show()
diff --git a/mlp.py b/mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..22008e84e745c42b77a12ef0237a5d4de3b1bbd1
--- /dev/null
+++ b/mlp.py
@@ -0,0 +1,166 @@
+import numpy as np
+from sklearn.metrics import accuracy_score
+import read_cifar
+import matplotlib.pyplot as plt
+import os
+
+N = 30  # number of input data
+d_in = 2  # input dimension
+d_h = 3  # number of neurons in the hidden layer
+d_out = 1  # output dimension (number of neurons of the output layer)
+
+# Random initialization of the network weights and biaises
+w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+b1 = np.zeros((1, d_h))  # first layer biaises
+w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+b2 = np.zeros((1, d_out))  # second layer biaises
+
+data = np.random.rand(N, d_in)  # create a random data
+targets = np.random.rand(N, d_out)  # create a random targets
+
+# Sigmoid function
+def sigmoid(z):
+    return 1 / (1 + np.exp(-np.clip(z, -30, 30))) #to avoid overflow
+
+# Forward pass
+def forward_pass(data, w1, b1, w2, b2):
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    return (a0,z1,a1,z2,a2,predictions)
+
+# Compute loss (MSE)
+def mse(predictions,targets):
+    loss = np.mean(np.square(predictions - targets))
+    return (loss)
+
+def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate = 0.01):
+    a0,z1,a1,z2,a2,predictions = forward_pass(data, w1, b1, w2, b2)
+    loss = mse(predictions,targets)
+
+    # Backpropagation
+    grad_z2 = 2/targets.shape[1] * (a2-targets) * a2 * (1-a2)
+    grad_w2 = np.matmul(a1.T, grad_z2)
+    grad_b2 = np.sum(grad_z2, axis=0, keepdims=True)
+    grad_a1 = np.matmul(grad_z2, w2.T)
+    grad_z1 = grad_a1 * a1 * (1 - a1)
+    grad_w1 = np.matmul(data.T, grad_z1)
+    grad_b1 = np.sum(grad_z1, axis=0, keepdims=True)
+
+    # Update weights and biases using gradient descent
+    w1 -= learning_rate * grad_w1
+    b1 -= learning_rate * grad_b1
+    w2 -= learning_rate * grad_w2
+    b2 -= learning_rate * grad_b2
+
+    return w1, b1, w2, b2, loss
+
+
+# Forward pass
+def forward(data, w1, b1, w2, b2):
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = softmax_stable(z2)  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    return (a0,z1,a1,z2,a2,predictions)
+
+def one_hot(labels):
+    num_classes = np.max(labels) + 1
+    one_hot_matrix = np.eye(num_classes)[labels]
+    return one_hot_matrix
+
+def softmax_stable(x):
+    #We use this function to avoid computing to big numbers
+    return(np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum())
+
+
+def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
+
+    a0,z1,a1,z2,a2,predictions = forward(data, w1, b1, w2, b2)
+    
+    N = len(labels_train)
+
+    labels_train = one_hot(labels_train)
+    
+    # Compute the gradient of the loss with respect to the predictions (a2)
+    grad_z2 = a2 - labels_train
+
+    # Backpropagation
+    grad_w2 = np.matmul(a1.T, grad_z2)
+    grad_b2 = np.sum(grad_z2, axis=0, keepdims=True)
+    grad_a1 = np.matmul(grad_z2, w2.T)
+    grad_z1 = grad_a1 * a1 * (1 - a1)
+    grad_w1 = np.matmul(data.T, grad_z1)
+    grad_b1 = np.sum(grad_z1, axis=0, keepdims=True)
+
+    # Update weights and biases using gradient descent
+    w1 -= learning_rate * grad_w1
+    b1 -= learning_rate * grad_b1
+    w2 -= learning_rate * grad_w2
+    b2 -= learning_rate * grad_b2
+
+    #Ajout d'un coefficient epsilon très faible dans la fonction de coût pour éviter les problèmes de division par zéro
+    epsilon = 1e-9
+    loss = -np.sum(labels_train * np.log(predictions + epsilon) + (1 - labels_train) * np.log(1 - predictions + epsilon)) / N
+
+    return w1, b1, w2, b2, loss
+
+#Fonction de prédiction qui pour un vecteur donné renvoie la classe prédite (cad l'indice de l'élément le plus élevé)
+def predict_class(predictions):
+    return np.argmax(predictions, axis=1)
+
+#Fonction taux de réussite qui compare une liste de prédictions à la liste des résultats et renvoie la proportion de vraies prédictions
+def accuracy(y_true, y_pred):
+    return np.mean(y_true == y_pred)
+
+def train_mlp(w1,b1,w2,b2, data_train, labels_train, learning_rate, num_epoch):
+    train_accuracies = []
+    for i in range(num_epoch):
+        w1,b1,w2,b2, loss = learn_once_cross_entropy(w1,b1,w2,b2, data_train, labels_train,learning_rate)
+        a0,z1,a1,z2,a2,predictions = forward(data_train, w1, b1, w2, b2)
+        predictions =  predict_class(a2)
+        train_accuracies.append(accuracy(labels_train, predictions))
+    return (w1,b1,w2,b2, train_accuracies)    
+
+def test_mlp(w1,b1,w2,b2, data_test,labels_test):
+    a0,z1,a1,z2,a2,predictions = forward(data_test, w1, b1, w2, b2)
+    predictions = predict_class(a2)
+    test_accuracy = accuracy(labels_test, predictions)
+    return test_accuracy
+
+def run_mlp_training(data_train,labels_train,data_test,labels_test,d_h,learning_rate,num_epoch):
+    N = data_train.shape[0]  # number of input data
+    d_in = data_train.shape[1]  # input dimension
+    d_out = np.max(labels_train)+1 # output dimension (number of neurons of the output layer)
+
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+    
+    w1,b1,w2,b2, train_accuracies = train_mlp(w1,b1,w2,b2, data_train, labels_train, learning_rate, num_epoch)
+    test_accuracy = test_mlp(w1,b1,w2,b2, data_test,labels_test)
+    return train_accuracies, test_accuracy
+
+
+if __name__ == "__main__":
+    data_folder = 'data/cifar-10-batches-py'
+    batch_filename = 'data_batch_1' # Adjust this to the specific batch file you want to read
+    batch_path = os.path.join(data_folder, batch_filename)
+    data, labels = read_cifar.read_cifar_batch(batch_path)
+    data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9)
+    train_accuracies, test_accuracy = run_mlp_training(data_train,labels_train,data_test,labels_test,64,0.1,100)
+    plt.figure(figsize=(12, 4))
+    plt.plot(train_accuracies)
+    plt.xlabel('Epoch')
+    plt.ylabel('Accuracy')
+    plt.title('Training Accuracy')
+    plt.savefig('results/mlp.png')
+    plt.show()
+    
diff --git a/read_cifar.py b/read_cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4bb3b3f66fd15ad40ba62f7bd16d18dabd7b55b
--- /dev/null
+++ b/read_cifar.py
@@ -0,0 +1,74 @@
+import numpy as np
+import pickle
+import os
+
+def read_cifar_batch(batch_path):
+    with open(batch_path, 'rb') as file:
+        batch_data = pickle.load(file, encoding='bytes')
+
+    data = np.array(batch_data[b'data'], dtype=np.float32)
+    labels = np.array(batch_data[b'labels'], dtype=np.int64)
+
+    return data, labels
+
+def read_cifar(batch_path):
+    data = []
+    labels = []
+
+    file_list = os.listdir(batch_path)
+
+    for file in file_list :
+        path = os.path.join(batch_path, file)
+        d, l= read_cifar_batch(path)
+        data.append(d)
+        labels.append(l)
+
+    return data, labels
+
+def split_dataset(data, labels, split):
+    if split < 0 or split > 1:
+        raise ValueError("The split parameter must be a float between 0 and 1.")
+
+    # Get the number of samples in the dataset
+    num_samples = len(data)
+
+    # Calculate the number of samples for the training set
+    num_train_samples = int(num_samples * split)
+
+    # Create a random permutation of indices for shuffling
+    indices = np.random.permutation(num_samples)
+
+    # Split the indices into training and test sets
+    train_indices = indices[:num_train_samples]
+    test_indices = indices[num_train_samples:]
+
+    # Split the data and labels based on the shuffled indices
+    data_train = data[train_indices]
+    labels_train = labels[train_indices]
+    data_test = data[test_indices]
+    labels_test = labels[test_indices]
+    
+    return (data_train, labels_train, data_test, labels_test)
+
+
+if __name__ == "__main__":
+     data_folder = 'data/cifar-10-batches-py'
+     batch_filename = 'data_batch_1' # Adjust this to the specific batch file you want to read
+
+     batch_path = os.path.join(data_folder, batch_filename)
+
+     data, labels = read_cifar_batch(batch_path)
+
+##     # Example: Printing the shape of data and labels
+##     print("Data shape:", data.shape)
+##     print("Labels shape:", labels.shape)
+
+     # Example: Printing data and labels for all files from the folder
+     data1, labels1 = read_cifar(data_folder)
+     print("Data :", data1)
+     print("Labels :", labels1)
+
+##     data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8)
+##     # Example: Printing the shape of data test and train :
+##     print("Data train shape:", data_train.shape)
+##     print("Data test shape:", data_test.shape)
diff --git a/results/knn.png b/results/knn.png
new file mode 100644
index 0000000000000000000000000000000000000000..587f48dfc222e49ea0d9ccb812fc84f8dbaefd0e
Binary files /dev/null and b/results/knn.png differ
diff --git a/results/mlp.png b/results/mlp.png
new file mode 100644
index 0000000000000000000000000000000000000000..ef972fe86b943ddfe2aa46fcb6eb076549dd0378
Binary files /dev/null and b/results/mlp.png differ