Skip to content
Snippets Groups Projects
Commit 6444c4c5 authored by Danjou Pierre's avatar Danjou Pierre
Browse files

knn finito

parent f2295c1f
No related branches found
No related tags found
No related merge requests found
...@@ -162,3 +162,4 @@ cython_debug/ ...@@ -162,3 +162,4 @@ cython_debug/
#.idea/ #.idea/
/data /data
\data
# -*- coding: utf-8 -*- import numpy as np
from read_cifar import *
import matplotlib.pyplot as plt
import numpy as np
def distance_matrix(A, B):
""" """
Created on Thu Nov 7 10:19:23 2024 Compute the L2 Euclidean distance matrix between two matrices A and B.
@author: danjo Parameters:
A (numpy.ndarray): Matrix of shape (m, n)
B (numpy.ndarray): Matrix of shape (p, n)
Returns:
numpy.ndarray: Distance matrix of shape (m, p) where the element (i, j) is the
Euclidean distance between A[i] and B[j].
""" """
# Squared norms of each row in A and B
A_squared = np.sum(A**2, axis=1).reshape(-1, 1) # Shape (m, 1)
B_squared = np.sum(B**2, axis=1).reshape(1, -1) # Shape (1, p)
# Compute the squared L2 distance matrix using the formula
dists_squared = A_squared + B_squared - 2 * np.dot(A, B.T)
# Ensure non-negative values due to potential floating-point errors, then take the square root
dists = np.sqrt(np.maximum(dists_squared, 0))
return dists
def knn_predict(dists, labels_train, k):
output = []
# Loop on all the images_test
for i in range(len(dists)):
# Innitialize table to store the neighbors
res = [0] * 10
# Get the closest neighbors
labels_close = np.argsort(dists[i])[:k]
for label in labels_close:
#add a label to the table of result
res[labels_train[label]] += 1
# Get the class with the maximum neighbors
label_temp = np.argmax(res) #Careful to the logic here, if there is two or more maximum, the function the first maximum encountered
output.append(label_temp)
return(np.array(output))
def evaluate_knn(data_train, labels_train, data_test, labels_tests, k):
dist = distance_matrix(data_test, data_train)
result_test = knn_predict(dist, labels_train, k)
#accuracy
N = labels_tests.shape[0]
accuracy = (labels_tests == result_test).sum() / N
return(accuracy)
# def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
# dists = distance_matrix(data_test, data_train)
# # Determine the number of images in data_test
# tot = len(data_test)
# accurate = 0
# predicted_labels = knn_predict(dists, labels_train, k)
# # Count the number of images in data_test whose label has been estimated correctly
# for i in range(tot):
# if predicted_labels[i] == labels_test[i]:
# accurate += 1
# # Calculate the classification rate
# accuracy = accurate/tot
# return accuracy
if __name__ == "__main__":
path = r'data\cifar-10-batches-py\data_batch_1'
main_path = r'data\cifar-10-batches-py'
data, labels = read_cifar_batch(path)
data, labels = read_cifar(main_path)
data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.9)
print(labels_test)
dists = distance_matrix(data_test, data_train)
#print(dists)
r = knn_predict(dists, labels_train, 10)
accurancy = evaluate_knn(data_train, labels_train, data_test, labels_test, 10)
print(r)
print(accurancy)
# data, labels = read_cifar('data\cifar-10-batches-py')
# data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.9)
# k=3
# accurancies = []
# accurancy = evaluate_knn(data_train, data_test, labels_train, labels_test, k)
# accurancies.append(accurancy)
# print(accurancies)
\ No newline at end of file
...@@ -7,7 +7,6 @@ Created on Thu Nov 7 08:45:09 2024 ...@@ -7,7 +7,6 @@ Created on Thu Nov 7 08:45:09 2024
import numpy as np import numpy as np
import pickle import pickle
from sklearn.model_selection import train_test_split
...@@ -59,16 +58,16 @@ def split_dataset(data, labels, split): ...@@ -59,16 +58,16 @@ def split_dataset(data, labels, split):
data_train, data_test = data[train_idx,:].astype(np.float32), data[test_idx,:].astype(np.float32) data_train, data_test = data[train_idx,:].astype(np.float32), data[test_idx,:].astype(np.float32)
labels_train, labels_test = labels[train_idx].astype(np.int64), labels[test_idx].astype(np.int64) labels_train, labels_test = labels[train_idx].astype(np.int64), labels[test_idx].astype(np.int64)
return (data_train, data_test, labels_train, labels_test) return data_train, data_test, labels_train, labels_test
if __name__ == "__main__": if __name__ == "__main__":
path = 'data\cifar-10-batches-py\data_batch_1' path = r'data\cifar-10-batches-py\data_batch_1'
main_path = 'data\cifar-10-batches-py' main_path = r'data\cifar-10-batches-py'
data, labels = read_cifar_batch(path) data, labels = read_cifar_batch(path)
data, labels = read_cifar(main_path) data, labels = read_cifar(main_path)
X_train, X_test, y_train, y_test = split_dataset(data, labels, 0.9) data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.9)
print(X_train, X_test, y_train, y_test) #print(X_train, X_test, y_train, y_test)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) #print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment