From cdb0f72cadffe8fcb94c7443391802dd409e6fed Mon Sep 17 00:00:00 2001
From: Muniz Silva Samuel <samuel.muniz-silva@ecl21.ec-lyon.fr>
Date: Tue, 8 Nov 2022 19:31:34 +0000
Subject: [PATCH] final commit

---
 knn.py | 80 ++++++++++++++++++++++++++++++----------------------------
 1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/knn.py b/knn.py
index 6c19361..6dc8f75 100644
--- a/knn.py
+++ b/knn.py
@@ -9,63 +9,65 @@ from sklearn.neighbors import KNeighborsRegressor
 import matplotlib.pyplot as plt
 
 
-def distance_matrix(data_test,data_train):
+def distance_matrix(data_test, data_train):
+    """Takes the matrix data_test and data_train. It returning a 2d array(N,M) such that dists[i,j] represents
+    the distance between the i-th data_test row and the j-th data_train row
+    """
+    dists = np.array([np.sum((data_train - l) ** 2, axis=1) ** 0.5 for l in data_test])
 
-  dists = np.array([np.sum((data_train-l)**2,axis=1)**.5 for l in data_test])
+    return dists
 
-  return dists
-#receives a 2d array data_train(M,k) and a data_test (N,k), 
-#returning a 2d array(N,M) such that dists[i,j] represents 
-#the distance between the i-th data_test row and the j-th data_train row
-#in resume,  each column represent a distance of a training point to all other
 
-def knn_predict(dists , labels_train , k):
-  #classif = np.array(0)
-  print(labels_train[:20])
-  print(labels_train.size)
-  classif = []
+def knn_predict(dists, labels_train, k):
+    """Take the matrix of distances  dists, the labels for training and k nearest neighbor
+    It returns the classification given by the module KNN.
+    """
+    # classif = np.array(0)
+    print(labels_train[:20])
+    print(labels_train.size)
+    classif = []
 
-  for testRows in dists.T:
-    
-    distances = np.stack((testRows,labels_train),axis = 1)
-    distances = distances[distances[:, 0].argsort()]
-    #for picturesClasses in distances[:k,1]:
-    countArray = [np.count_nonzero(distances[:k,1]==i) for i in range(0,10)]
-    classif = np.append(classif,np.argmax(countArray))     
+    for testRows in dists.T:
 
-  classif = np.array(classif , dtype = int)    
+        distances = np.stack((testRows, labels_train), axis=1)
+        distances = distances[distances[:, 0].argsort()]
+        # for picturesClasses in distances[:k,1]:
+        countArray = [np.count_nonzero(distances[:k, 1] == i) for i in range(0, 10)]
+        classif = np.append(classif, np.argmax(countArray))
 
-  return classif
+    classif = np.array(classif, dtype=int)
 
-def evaluate_knn(data_train,labels_train,data_test,labels_test,k):
+    return classif
 
-  classif = np.array(knn_predict(distance_matrix(data_train,data_test) , labels_train , k)) 
-  result = np.array(classif == labels_test)
-  acc = np.count_nonzero(result) / np.size(result)
 
-  return acc*100
+def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
+    """Receives the datas ans labels for training and teste and k nearest neighbor.
+    It retuns the accuracy of the KNN module"""
+    classif = np.array(
+        knn_predict(distance_matrix(data_train, data_test), labels_train, k)
+    )
+    result = np.array(classif == labels_test)
+    acc = np.count_nonzero(result) / np.size(result)
 
+    return acc * 100
 
 
-datas,labels = read_cifar_batch('data_batch_1')
-print(datas.shape,labels.shape)
-dataTrain,dataTest,labelsTrain,labelsTest = split_dataset(datas,labels)
-print(dataTrain.shape,dataTest.shape,labelsTrain.shape)
-distanceMatrix = distance_matrix(dataTrain,dataTest)
-print(distanceMatrix.shape)
+datas, labels = read_cifar_batch("data_batch_1")
+dataTrain, dataTest, labelsTrain, labelsTest = split_dataset(datas, labels)
+distanceMatrix = distance_matrix(dataTrain, dataTest)
 print()
 
 result = []
-for i in range (1,21):
-  result = np.append(result,evaluate_knn(dataTrain,labelsTrain,dataTest,labelsTest,i)) 
+for i in range(1, 21):
+    result = np.append(
+        result, evaluate_knn(dataTrain, labelsTrain, dataTest, labelsTest, i)
+    )
 
 x = np.arange(1, 21)
- 
-# plotting
+
+# plot the graph of (Accuracy) x k
 plt.title("Plot graph")
 plt.xlabel("K neighbors")
 plt.ylabel("Accuracy %")
-plt.plot(x, result, color ="red")
+plt.plot(x, result, color="red")
 plt.show()
-
-
-- 
GitLab