From 72bb7ee1d99b7365de5ee90770132b7a6b5fb9ae Mon Sep 17 00:00:00 2001
From: lucile <lucile.audard@ecl20.ec-lyon.fr>
Date: Tue, 7 Nov 2023 13:53:01 +0100
Subject: [PATCH] Update knn.py

---
 knn.py | 48 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/knn.py b/knn.py
index 54a3d58..2e11742 100644
--- a/knn.py
+++ b/knn.py
@@ -1,15 +1,20 @@
 import numpy as np
+from read_cifar import *
+import matplotlib.pyplot as plt
 
 
 def distance_matrix(mat1, mat2):
-    square1 = np.sum(np.square(mat1), axis = 1)
-    square2 = np.sum(np.square(mat2), axis = 1)
+    # A^2 and B^2
+    square1 = np.sum(np.square(mat1), axis = 1, keepdims=True)
+    square2 = np.sum(np.square(mat2), axis = 1, keepdims=True)
+    # A*B
     prod = np.dot(mat1, mat2.T)
-    dists = np.sqrt(square1 + square2 - 2 * prod)
+    # A^2 + B^2 -2*A*B
+    dists = np.sqrt(square1 + square2.T - 2 * prod)
     return dists
 
 def knn_predict(dists, labels_train, k):
-    # results matrix initialisation
+    # results matrix initialization
     predicted_labels = np.zeros(len(dists))
     # loop on all the test images
     for i in range(len(dists)):
@@ -19,17 +24,21 @@ def knn_predict(dists, labels_train, k):
         # get the matching labels_train
         closest_labels = labels_train[k_sorted_dists]
         # get the most common labels_train
-        predicted_labels[i] = np.argmax(closest_labels)
+        uniques, counts = np.unique(closest_labels, return_counts = True)
+        predicted_labels[i] = uniques[np.argmax(counts)]
     return np.array(predicted_labels)
 
 def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
     dists = distance_matrix(data_test, data_train)
+    # Determine the number of images in data_test
     tot = len(data_test)
     accurate = 0
     predicted_labels = knn_predict(dists, labels_train, k)
+    # Count the number of images in data_test whose label has been estimated correctly
     for i in range(tot):
         if predicted_labels[i] == labels_test[i]:
             accurate += 1
+    # Calculate the classification rate
     accuracy = accurate/tot
     return accuracy
 
@@ -42,14 +51,27 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
 
 if __name__ == "__main__":
 
-    bench_knn()
-    # data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py')
-    # X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9)
-    # print(evaluate_knn(X_train, y_train, X_test, y_test, 5))
-    # print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
+    data, labels = read_cifar("./data/cifar-10-batches-py")
+    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.9)
+    
+    k_list = [k for k in range(1, 21)]
+    accuracy = [evaluate_knn(data_train, labels_train, data_test, labels_test, k) for k in range (1, 21)]
+    
+    plt.plot([k for k in range (1, 21)], accuracy)
+    plt.title("Variation of k-nearest neighbors method accuracy for k from 1 to 20")
+    plt.xlabel("k value")
+    plt.ylabel("Accuracy")
+    plt.grid(True, which='both')
+    plt.savefig("results/knn.png")
+
 
-    # y_test = []
     # x_test = np.array([[1,2],[4,6]])
+    # x_labels_test = np.array([0,1])
     # x_train = np.array([[2,4],[7,2],[4,6]])
-    # y_train = [1,2,1]
-    # dist = distance_matrix(x_test,x_train)
+    # x_labels_train = np.array([0,1,1])
+
+    # dist = distance_matrix(x_test, x_train)
+    # accuracy = evaluate_knn(x_train, x_labels_train, x_test, x_labels_test, 1)
+    # print(accuracy)
+
+    
-- 
GitLab