Part 2 : KNN

a28545d9 · Cart Milan · 79ed1784 · a28545d9 · a28545d9
Commit a28545d9 authored 1 year ago by Cart Milan
--- a/__pycache__/read_cifar.cpython-311.pyc
+++ b/__pycache__/read_cifar.cpython-311.pyc
--- a/knn.py
+++ b/knn.py
+import read_cifar
+import numpy as np
+import matplotlib.pyplot as plt
+def distance_matrix(matrix1, matrix2):
+    #X_test then X_train in this order
+    sum_of_squares_matrix1 = np.sum(np.square(matrix1), axis=1, keepdims=True) #A^2
+    sum_of_squares_matrix2 = np.sum(np.square(matrix2), axis=1, keepdims=True) #B^2
+    dot_product = np.dot(matrix1, matrix2.T) # A * B (matrix mutliplication)
+    dists = np.sqrt(sum_of_squares_matrix1 + sum_of_squares_matrix2.T - 2 * dot_product) # Compute the product
+    return dists
+def knn_predict(dists, labels_train, k):
+    output = []
+    # Loop on all the images_test
+    for i in range(len(dists)):
+        # Innitialize table to store the neighbors
+        res = [0] * 10
+        # Get the closest neighbors
+        labels_close = np.argsort(dists[i])[:k]
+        for label in labels_close:
+            #add a label to the table of result
+            res[labels_train[label]] += 1
+        # Get the class with the maximum neighbors
+        label_temp = np.argmax(res) #Careful to the logic here, if there is two or more maximum, the function the first maximum encountered
+        output.append(label_temp)
+    return(np.array(output))
+def evaluate_knn(data_train, labels_train, data_test, labels_tests, k):
+    dist = distance_matrix(data_test, data_train)
+    result_test = knn_predict(dist, labels_train, k)
+    #accuracy 
+    N = labels_tests.shape[0]
+    accuracy = (labels_tests == result_test).sum() / N
+    return(accuracy)
+def bench_knn() :
+    k_indices = [i for i in range(20) if i % 2 != 0]
+    accuracies = []
+    # Load data
+    data, labels = read_cifar.read_cifar('/Users/milancart/Documents/GitHub/image-classification/Data/cifar-10-batches-py')
+    X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9)
+    #Load one batch
+    # data, labels = read_cifar.read_cifar_batch('image-classification/data/cifar-10-batches-py/data_batch_1')
+    # X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9)
+    # Loop on the k_indices to get all the accuracies
+    for k in k_indices :
+        accuracy = evaluate_knn(X_train, y_train, X_test, y_test, k)
+        accuracies.append(accuracy)
+    # Save and show the graph of accuracies
+    plt.figure(figsize=(8, 6))
+    plt.xlabel('K')
+    plt.ylabel('Accuracy')
+    plt.plot(k_indices, accuracies)
+    plt.title("Accuracy as function of k")
+    plt.legend()
+    plt.show()
+    plt.savefig('/Users/milancart/Documents/GitHub/image-classification/result/knn.png')
+if __name__ == "__main__":
+    print('milan')
+    bench_knn()
+    data, labels = read_cifar.read_cifar('/Users/milancart/Documents/GitHub/image-classification/Data/cifar-10-batches-py')
+    X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9)
+    print(evaluate_knn(X_train, y_train, X_test, y_test, 5))
+    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
+    y_test = []
+    x_test = np.array([[1,2],[4,6]])
+    x_train = np.array([[2,4],[7,2],[4,6]])
+    y_train = [1,2,1]
+    dist = distance_matrix(x_test,x_train)      
\ No newline at end of file