diff --git a/.gitignore b/.gitignore
index 5469eefe6ada355ef1a95bbbc42adabc983b580c..b9cc96274fed4ae6040ab52a1df6de36969ef1b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
-DATA
+data
 Env_Deep
diff --git a/__pycache__/knn.cpython-311.pyc b/__pycache__/knn.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a72d35584c1c74976a3fcd277918627ba2303a9c
Binary files /dev/null and b/__pycache__/knn.cpython-311.pyc differ
diff --git a/__pycache__/read_cifar.cpython-311.pyc b/__pycache__/read_cifar.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab2c0562b5bc35efe9e0bad8233915e686e34383
Binary files /dev/null and b/__pycache__/read_cifar.cpython-311.pyc differ
diff --git a/knn.py b/knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..07a3b38e8789538324588af907fc4c28ec0cf0e2
--- /dev/null
+++ b/knn.py
@@ -0,0 +1,90 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+def distance_matrix(train, test):
+    """
+    Args:
+    train (np.ndarray(np.float32)): La matrice de données d'images d'entraînement de forme (n, m).
+    test (np.ndarray(np.float32)): La matrice de données d'images de test de forme (m, p).
+
+    Retourne:
+    np.ndarray(np.float32): La matrice des distances euclidiennes L2 entre train et test, de forme (n, p).
+
+    """
+
+    dists = np.sqrt(-2 * np.matmul(train, test.T) + 
+                    np.sum(train*train, axis=1, keepdims=True) + 
+                    np.sum(test*test, axis=1, keepdims=True).T)
+    print('finished calculating dists')
+
+    return dists
+
+def mode(x):
+    """
+    Args:
+    x (float): un tableau de nombres
+
+    Retourne:
+    float: le mode de x
+
+    """
+    vals, counts = np.unique(x, return_counts=True)
+
+    return vals[np.argmax(counts)]
+
+def knn_predict(dists, labels_train, k):
+    """
+    Cette fonction prédit les étiquettes pour les exemples de test dans la matrice de distances "dists" en utilisant les k plus proches voisins du test dans l'ensemble d'entraînement.
+    Retourne :
+    labels_pred (tableau numpy) : tableau de taille (k, num_test) des étiquettes prédites pour les données de test.
+    
+    """
+
+    indexes_of_knn = np.argsort(dists, axis=0)[0:k, :]
+    nearest_labels_pred = labels_train[indexes_of_knn]
+    labels_pred = np.array([ mode(label) for label in nearest_labels_pred.T ])
+
+    return labels_pred
+
+def evaluate_knn_for_k(data_train, labels_train, data_test, labels_test, K_max):
+    """
+    Args:
+    data_train (tableau numpy): données d'entraînement
+    labels_train (tableau numpy): étiquettes d'entraînement
+    data_test (tableau numpy): données de test
+    labels_test (tableau numpy): étiquettes de test
+    k_max (entier) : nombre maximal de voisins dans le k-NN
+
+    Retourne:
+    accuracies (Liste): Liste des précisions des prédictions pour chaque valeur de k.
+
+    """
+
+    accuracies = [0] * K_max
+    dists = distance_matrix(data_train, data_test)
+
+    for k in range(1, K_max + 1):
+        labels_pred = knn_predict(dists, labels_train, k)
+        accuracy = np.sum(labels_pred == labels_test) / len(labels_test)
+        accuracies[k - 1] = accuracy
+
+    return accuracies
+
+def plot_accuracy_versus_k(accuracies):
+    """
+    Cette fonction génère un graphique illustrant la variation de la précision en fonction de k 
+    et sauvegarde le graphique dans le dossier /results.
+    
+    """
+
+    k = len(accuracies)
+
+    fig = plt.figure(figsize=(12, 8))
+    plt.plot(np.arange(1, k+1, 1), accuracies, 'o-r')
+    plt.title("Variation of the accuracy as a function of k")
+    plt.xlabel("k (number of neighbors)")
+    plt.ylabel("Accuracy")
+    ax = fig.gca()
+    ax.set_xticks(np.arange(1, k+1, 1))
+    plt.grid(axis='both', which='both')
+    plt.savefig(r'C:\Users\hp\Desktop\BE\image-classification\resultats\Knn.png')
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcda22e66e4f61bbe2bc6aef7c9100203c8c7bd4
--- /dev/null
+++ b/main.py
@@ -0,0 +1,13 @@
+from read_cifar import *
+from knn import *
+
+path = r'C:\Users\hp\Desktop\BE\image-classification\data'
+
+if __name__ == "__main__":
+    split_factor = 0.9
+    X, y = read_cifar(path)
+    X_train,y_train,X_test,y_test=split_dataset(X,y,split=0.9)
+    
+    K_max=20
+    accuries=evaluate_knn_for_k(X_train, y_train, X_test, y_test, K_max)
+    plot_accuracy_versus_k(accuries)
\ No newline at end of file
diff --git a/read_cifar.py b/read_cifar.py
index ad16d246823fa9bfd2a0b00fe370f6b24c6a1fbb..bc11bac06a81539364b298640d2d88efe0425425 100644
--- a/read_cifar.py
+++ b/read_cifar.py
@@ -1,88 +1,79 @@
-import pickle
 import numpy as np
+import os
+import pickle
 
-#read_cifaar_batch
-def read_cifar_batch(file) :
-    with open(file, 'rb') as fo:
-        dictionnaire = pickle.load(fo, encoding='bytes') 
-        data=dictionnaire[b'data'].astype(np.float32)
-        labels=np.array(dictionnaire[b'labels'],np.int64)
-    return data,labels
-'''
-EXPLICATION DE LA FONCTION:
-read_cifaar_batch function: read the path of a single batch.
-
-Arguments:
-- The path of a single batch as a string, 
-
-Returns: 
-- Matrix data of size (batch_size x data_size)
-- Vector labels of size batch_size
-
-The data must be np.float32 array and labels must be np.int64 array.
-'''
-
-
-
-
-# read_cifar
 
-def read_cifar(folder):
-    batch_file=["data_batch_1","data_batch_2","data_batch_3","data_batch_4","data_batch_5","test_batch"]
-    for i in range(len(batch_file)):
-        path= folder +'/'+batch_file[i]
-        if i==0:
-            data,labels=read_cifar_batch(path)
+def unpickle(file):
+    with open(file, 'rb') as fo:
+        dict = pickle.load(fo, encoding='bytes')
+    return dict
+
+#La fonction lecture_cifar : prenne en argument le chemin du répertoire contenant les données, et renvoyant une matrice X de taille NxD où N correspond au nombre de données disponibles, et D à la dimension de ces données (nombre de valeurs numériques décrivant les données), ainsi qu'un vecteur Y de taille N dont les valeurs correspondent au code de la classe de la donnée de même indice dans X. 
+
+#X et Y sont objets numpy
+def read_cifar_batch(file):
+    """
+    read_cifaar_batch function: read the path of a single batch.
+
+    Arguments:
+    - The path of a single batch as a string, 
+
+    Returns: 
+    - Matrix data of size (batch_size x data_size)
+    - Vector labels of size batch_size
+    """
+    dict = unpickle(file)
+    data = dict[b'data'].astype(np.float32)
+    labels = np.array(dict[b'labels'], dtype=np.int64)
+    labels = labels.reshape(labels.shape[0])
+
+    return data, labels
+
+def read_cifar(path):
+    """
+    read_cifaar function: read the path of the directory containing all batches (including test_batch).
+
+    Arguments:
+    - the path of the directory containing the six batches (five data_batch and one test_batch) as a string
+
+    Returns:
+    - Matrix data of size (batch_size x data_size)
+    - Vector labels of size batch_size
+    """
+    data_batches = ["data_batch_" + str(i) for i in range(1, 6)] + ['test_batch']
+
+    flag = True
+
+    for db in data_batches:
+        data, labels = read_cifar_batch(os.path.join(path, db))
+        if flag:
+                DATA = data
+                LABELS = labels
+                flag = False
         else:
-            x,y=read_cifar_batch(path)
-            data =np.vstack([data ,x])    # all data for all batches is in variable "data"
-            labels=np.hstack([labels,y])  # All labels for all batches is in variable "labels"
-    return data ,labels
-
-'''
-EXPLICATION DE LA FONCTION:
-read_cifaar function: read the path of the directory containing all batches (including test_batch).
-
-Arguments:
-- the path of the directory containing the six batches (five data_batch and one test_batch) as a string
-
-Returns:
-- Matrix data of size (batch_size x data_size)
-- Vector labels of size batch_size<
-
-The data must be np.float32 array and labels must be np.int64 array.
-'''
-
-
-# split_dataset
-
-def split_dataset(data,labels,split):
-    labels=labels.reshape(data.shape[0],1)
-    # Stack our Data and labels
-    con = np.hstack((data, labels))
-    k=int(split*con.shape[0])
-    # Shuffle all our Data stack it
-    np.random.shuffle(con)
-    # Train
-    X_train=con[:k,:-1]
-    y_train=np.array(con[:k,-1],np.int64)
-    # Test
-    X_test=con[k:,:-1]
-    y_test=np.array(con[k:,-1],np.int64)
-    return X_train,y_train,X_test,y_test
-
-'''
-EXPLICATION DE LA FONCTION:
-split_dataset function: splits the dataset into a training set and a test set.
-
-Arguments:
-- data and labels, two arrays that have the same size in the first dimension.
-- split, a float between 0 and 1 which determines the split factor of the training set with respect to the test set.
-
-Returns:
-- data_train: the training data,
-- labels_train: the corresponding labels,
-- data_test: the testing data, and
-- labels_test: the corresponding labels.
-'''
-
+            DATA = np.concatenate((DATA, data), axis=0, dtype=np.float32)
+            LABELS = np.concatenate((LABELS, labels), axis=-1, dtype=np.int64)
+    return DATA, LABELS
+
+def split_dataset(data, labels, split=0.6):
+    """
+    split_dataset function: splits the dataset into a training set and a test set.
+
+    Arguments:
+    - data and labels, two arrays that have the same size in the first dimension.
+    - split, a float between 0 and 1 which determines the split factor of the training set with respect to the test set.
+
+    Returns:
+    - data_train: the training data,
+    - labels_train: the corresponding labels,
+    - data_test: the testing data, and
+    - labels_test: the corresponding labels.
+    """
+    n = data.shape[0]
+    indices = np.random.permutation(n)
+    train_idx, test_idx = indices[:int(split*n)], indices[int(split*n):]
+
+    data_train, data_test = data[train_idx,:].astype(np.float32), data[test_idx,:].astype(np.float32)
+    labels_train, labels_test = labels[train_idx].astype(np.int64), labels[test_idx].astype(np.int64)
+
+    return data_train, labels_train, data_test, labels_test 
\ No newline at end of file
diff --git a/resultats/Knn.png b/resultats/Knn.png
new file mode 100644
index 0000000000000000000000000000000000000000..926556a3309ceb450d489a8e4e5d4d85b3763aee
Binary files /dev/null and b/resultats/Knn.png differ