knn

knn without test on cifar data

knn
1c03774b · oscarchaufour · 9206be84 · 1c03774b · 1c03774b
Commit 1c03774b authored 1 year ago by oscarchaufour
--- a/knn.py
+++ b/knn.py
@@ -4,4 +4,72 @@ Created on Fri Oct 20 17:39:37 2023
 @author: oscar
 """
+import read_cifar
+import numpy as np
+import statistics
+from statistics import mode
+def distance_matrix(A,B) : 
+    # sum_of_squaresA = np.sum(A ** 2, axis=1)
+    # sum_of_squaresB = np.sum(B ** 2, axis=1)
+    sum_of_squaresA = np.sum(np.square(A), axis=1)
+    sum_of_squaresB = np.sum(np.square(B) ** 2, axis=1)
+    # Calculate the dot product between the two matrices
+    dot_product = np.dot(A, B.T)
+    # Calculate the Euclidean distance matrix using the hint provided
+    dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product)
+    return dists
+def knn_predict(dists, labels_train, k) : 
+    number_test, number_train = dists.shape
+    # initialze the predicted labels to zeros
+    labels_predicted = np.zeros(number_test)
+    for i in range(number_test) : 
+        sorted_indices = np.argsort(dists[i])
+        knn_indices = sorted_indices[ : k]
+        knn_labels = labels_train[knn_indices]
+        label_predicted = mode(knn_labels)
+        labels_predicted[i] = label_predicted
+    return labels_predicted
+def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
+    dists = distance_matrix(data_test, data_train)
+    labels_predicted = knn_predict(dists, labels_train, k)
+    number_true_prediction = np.sum(labels_test == labels_predicted)
+    number_total_prediction = labels_test.shape[0]
+    classification_rate = number_true_prediction/number_total_prediction
+    return classification_rate
+if __name__ == "__main__" :
+    # # Example distance matrix, training labels, and k value
+    # dists = np.array([[1000, 2, 3],
+    #                  [4, 0.1, 6],
+    #                  [7, 8, 0]])
+    # labels_train = np.array([0, 1, 5])
+    # k = 2
+    # # Predict labels for the test set using k-NN
+    # predicted_labels = knn_predict(dists, labels_train, k)
+    classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1)
+    print("Classification rate:")
+    print(classification_rate)    
+    # file = "./data/cifar-10-python/"
+    # data, labels = read_cifar.read_cifar(file)
+    # data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.8)
+    # dists = distance_matrix(data_train, data_test)
+    # k = 2
+    # knn_predict(dists, labels_train, k)
\ No newline at end of file
--- a/read_cifar.py
+++ b/read_cifar.py
@@ -60,4 +60,4 @@ def split_dataset(data, labels, split) :
 if __name__ == "__main__":
    file = "./data/cifar-10-python/"
    data, labels = read_cifar(file)
-    res = split_dataset(data, labels, 0.8)
+    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8)