Skip to content
Snippets Groups Projects
Commit 1c03774b authored by oscarchaufour's avatar oscarchaufour
Browse files

knn

knn without test on cifar data
parent 9206be84
No related branches found
No related tags found
No related merge requests found
...@@ -4,4 +4,72 @@ Created on Fri Oct 20 17:39:37 2023 ...@@ -4,4 +4,72 @@ Created on Fri Oct 20 17:39:37 2023
@author: oscar @author: oscar
""" """
import read_cifar
import numpy as np
import statistics
from statistics import mode
def distance_matrix(A,B) :
# sum_of_squaresA = np.sum(A ** 2, axis=1)
# sum_of_squaresB = np.sum(B ** 2, axis=1)
sum_of_squaresA = np.sum(np.square(A), axis=1)
sum_of_squaresB = np.sum(np.square(B) ** 2, axis=1)
# Calculate the dot product between the two matrices
dot_product = np.dot(A, B.T)
# Calculate the Euclidean distance matrix using the hint provided
dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product)
return dists
def knn_predict(dists, labels_train, k) :
number_test, number_train = dists.shape
# initialze the predicted labels to zeros
labels_predicted = np.zeros(number_test)
for i in range(number_test) :
sorted_indices = np.argsort(dists[i])
knn_indices = sorted_indices[ : k]
knn_labels = labels_train[knn_indices]
label_predicted = mode(knn_labels)
labels_predicted[i] = label_predicted
return labels_predicted
def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
dists = distance_matrix(data_test, data_train)
labels_predicted = knn_predict(dists, labels_train, k)
number_true_prediction = np.sum(labels_test == labels_predicted)
number_total_prediction = labels_test.shape[0]
classification_rate = number_true_prediction/number_total_prediction
return classification_rate
if __name__ == "__main__" :
# # Example distance matrix, training labels, and k value
# dists = np.array([[1000, 2, 3],
# [4, 0.1, 6],
# [7, 8, 0]])
# labels_train = np.array([0, 1, 5])
# k = 2
# # Predict labels for the test set using k-NN
# predicted_labels = knn_predict(dists, labels_train, k)
classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1)
print("Classification rate:")
print(classification_rate)
# file = "./data/cifar-10-python/"
# data, labels = read_cifar.read_cifar(file)
# data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.8)
# dists = distance_matrix(data_train, data_test)
# k = 2
# knn_predict(dists, labels_train, k)
\ No newline at end of file
...@@ -60,4 +60,4 @@ def split_dataset(data, labels, split) : ...@@ -60,4 +60,4 @@ def split_dataset(data, labels, split) :
if __name__ == "__main__": if __name__ == "__main__":
file = "./data/cifar-10-python/" file = "./data/cifar-10-python/"
data, labels = read_cifar(file) data, labels = read_cifar(file)
res = split_dataset(data, labels, 0.8) data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment