Select Git revision
Forked from
Vuillemot Romain / INF-TC1
Source project has a limited visibility.
-
Romain Vuillemot authoredRomain Vuillemot authored
knn.py 3.05 KiB
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 20 17:39:37 2023
@author: oscar
"""
import read_cifar
import numpy as np
import statistics
from statistics import mode
import time
import matplotlib.pyplot as plt
def distance_matrix(A,B) :
print("test0")
sum_of_squaresA= np.sum(A**2, axis = 1, keepdims = True)
sum_of_squaresB = np.sum(B**2, axis = 1)
print("test1")
# sum_of_squaresA = np.tile(sum_of_squaresAVect, (np.shape(B)[0], 1))
# sum_of_squaresB = np.tile(sum_of_squaresBVect, (np.shape(A)[0], 1))
# Calculate the dot product between the two matrices
# dot_product = np.matmul(A, B.T)
dot_product = np.einsum('ij,jk', A, B.T)
print("test2")
# Calculate the Euclidean distance matrix using the hint provided
dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product)
print("test3")
return dists
def knn_predict(dists, labels_train, k) :
number_train, number_test = dists.shape
# initialze the predicted labels to zeros
labels_predicted = np.zeros(number_test)
for j in range(number_test) :
sorted_indices = np.argsort(dists[:, j])
print(len(dists[:, j]))
break
knn_indices = sorted_indices[ : k]
knn_labels = labels_train[knn_indices]
label_predicted = mode(knn_labels)
labels_predicted[j] = label_predicted
return labels_predicted
def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
dists = distance_matrix(data_train, data_test)
labels_predicted = knn_predict(dists, labels_train, k)
number_true_prediction = np.sum(labels_test == labels_predicted)
number_total_prediction = labels_test.shape[0]
classification_rate = number_true_prediction/number_total_prediction
return classification_rate
if __name__ == "__main__" :
t1 = time.time()
# # Example distance matrix, training labels, and k value
# dists = np.array([[1000, 2, 3],
# [4, 0.1, 6],
# [7, 8, 0]])
# labels_train = np.array([0, 1, 5])
# k = 2
# # Predict labels for the test set using k-NN
# predicted_labels = knn_predict(dists, labels_train, k)
# classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1)
# print("Classification rate:")
# print(classification_rate)
file = "./data/cifar-10-python/"
data, labels = read_cifar.read_cifar(file)
data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9)
k = 10
print(len(data_train))
print(len(data_test))
print(len(data_train[0]))
print(len(data_test[0]))
# dists = distance_matrix(data_train, data_test)
# knn_predict(dists, labels_train, k)
classification_rate = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
print("classification rate :", classification_rate)
# plot_accuracy(data_train, labels_train, data_test, labels_test, 4)
t2 = time.time()
print('run time (second): ')
print(t2-t1)