From 72bb7ee1d99b7365de5ee90770132b7a6b5fb9ae Mon Sep 17 00:00:00 2001 From: lucile <lucile.audard@ecl20.ec-lyon.fr> Date: Tue, 7 Nov 2023 13:53:01 +0100 Subject: [PATCH] Update knn.py --- knn.py | 48 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/knn.py b/knn.py index 54a3d58..2e11742 100644 --- a/knn.py +++ b/knn.py @@ -1,15 +1,20 @@ import numpy as np +from read_cifar import * +import matplotlib.pyplot as plt def distance_matrix(mat1, mat2): - square1 = np.sum(np.square(mat1), axis = 1) - square2 = np.sum(np.square(mat2), axis = 1) + # A^2 and B^2 + square1 = np.sum(np.square(mat1), axis = 1, keepdims=True) + square2 = np.sum(np.square(mat2), axis = 1, keepdims=True) + # A*B prod = np.dot(mat1, mat2.T) - dists = np.sqrt(square1 + square2 - 2 * prod) + # A^2 + B^2 -2*A*B + dists = np.sqrt(square1 + square2.T - 2 * prod) return dists def knn_predict(dists, labels_train, k): - # results matrix initialisation + # results matrix initialization predicted_labels = np.zeros(len(dists)) # loop on all the test images for i in range(len(dists)): @@ -19,17 +24,21 @@ def knn_predict(dists, labels_train, k): # get the matching labels_train closest_labels = labels_train[k_sorted_dists] # get the most common labels_train - predicted_labels[i] = np.argmax(closest_labels) + uniques, counts = np.unique(closest_labels, return_counts = True) + predicted_labels[i] = uniques[np.argmax(counts)] return np.array(predicted_labels) def evaluate_knn(data_train, labels_train, data_test, labels_test, k): dists = distance_matrix(data_test, data_train) + # Determine the number of images in data_test tot = len(data_test) accurate = 0 predicted_labels = knn_predict(dists, labels_train, k) + # Count the number of images in data_test whose label has been estimated correctly for i in range(tot): if predicted_labels[i] == labels_test[i]: accurate += 1 + # Calculate the classification rate accuracy = accurate/tot return accuracy @@ -42,14 +51,27 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k): if __name__ == "__main__": - bench_knn() - # data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py') - # X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9) - # print(evaluate_knn(X_train, y_train, X_test, y_test, 5)) - # print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) + data, labels = read_cifar("./data/cifar-10-batches-py") + data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.9) + + k_list = [k for k in range(1, 21)] + accuracy = [evaluate_knn(data_train, labels_train, data_test, labels_test, k) for k in range (1, 21)] + + plt.plot([k for k in range (1, 21)], accuracy) + plt.title("Variation of k-nearest neighbors method accuracy for k from 1 to 20") + plt.xlabel("k value") + plt.ylabel("Accuracy") + plt.grid(True, which='both') + plt.savefig("results/knn.png") + - # y_test = [] # x_test = np.array([[1,2],[4,6]]) + # x_labels_test = np.array([0,1]) # x_train = np.array([[2,4],[7,2],[4,6]]) - # y_train = [1,2,1] - # dist = distance_matrix(x_test,x_train) + # x_labels_train = np.array([0,1,1]) + + # dist = distance_matrix(x_test, x_train) + # accuracy = evaluate_knn(x_train, x_labels_train, x_test, x_labels_test, 1) + # print(accuracy) + + -- GitLab