diff --git a/__pycache__/read_cifar.cpython-311.pyc b/__pycache__/read_cifar.cpython-311.pyc index 3c4534351e5e03b53d76e9087a1ffc193f08cd06..3d053de8f665020f45e54022379f58efabed47fd 100644 Binary files a/__pycache__/read_cifar.cpython-311.pyc and b/__pycache__/read_cifar.cpython-311.pyc differ diff --git a/knn.py b/knn.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0a687fbb47649384585c7715d635f1b5484a7474 100644 --- a/knn.py +++ b/knn.py @@ -0,0 +1,80 @@ +import read_cifar +import numpy as np +import matplotlib.pyplot as plt + +def distance_matrix(matrix1, matrix2): + #X_test then X_train in this order + sum_of_squares_matrix1 = np.sum(np.square(matrix1), axis=1, keepdims=True) #A^2 + sum_of_squares_matrix2 = np.sum(np.square(matrix2), axis=1, keepdims=True) #B^2 + + dot_product = np.dot(matrix1, matrix2.T) # A * B (matrix mutliplication) + + dists = np.sqrt(sum_of_squares_matrix1 + sum_of_squares_matrix2.T - 2 * dot_product) # Compute the product + return dists + +def knn_predict(dists, labels_train, k): + output = [] + # Loop on all the images_test + for i in range(len(dists)): + # Innitialize table to store the neighbors + res = [0] * 10 + # Get the closest neighbors + labels_close = np.argsort(dists[i])[:k] + for label in labels_close: + #add a label to the table of result + res[labels_train[label]] += 1 + # Get the class with the maximum neighbors + label_temp = np.argmax(res) #Careful to the logic here, if there is two or more maximum, the function the first maximum encountered + output.append(label_temp) + return(np.array(output)) + +def evaluate_knn(data_train, labels_train, data_test, labels_tests, k): + dist = distance_matrix(data_test, data_train) + result_test = knn_predict(dist, labels_train, k) + + #accuracy + N = labels_tests.shape[0] + accuracy = (labels_tests == result_test).sum() / N + return(accuracy) + +def bench_knn() : + + k_indices = [i for i in range(20) if i % 2 != 0] + accuracies = [] + + # Load data + data, labels = read_cifar.read_cifar('/Users/milancart/Documents/GitHub/image-classification/Data/cifar-10-batches-py') + X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9) + #Load one batch + # data, labels = read_cifar.read_cifar_batch('image-classification/data/cifar-10-batches-py/data_batch_1') + # X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9) + + # Loop on the k_indices to get all the accuracies + for k in k_indices : + accuracy = evaluate_knn(X_train, y_train, X_test, y_test, k) + accuracies.append(accuracy) + + # Save and show the graph of accuracies + plt.figure(figsize=(8, 6)) + plt.xlabel('K') + plt.ylabel('Accuracy') + plt.plot(k_indices, accuracies) + plt.title("Accuracy as function of k") + plt.legend() + plt.show() + plt.savefig('/Users/milancart/Documents/GitHub/image-classification/result/knn.png') + + +if __name__ == "__main__": + print('milan') + bench_knn() + data, labels = read_cifar.read_cifar('/Users/milancart/Documents/GitHub/image-classification/Data/cifar-10-batches-py') + X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9) + print(evaluate_knn(X_train, y_train, X_test, y_test, 5)) + print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) + + y_test = [] + x_test = np.array([[1,2],[4,6]]) + x_train = np.array([[2,4],[7,2],[4,6]]) + y_train = [1,2,1] + dist = distance_matrix(x_test,x_train) \ No newline at end of file