Skip to content
Snippets Groups Projects
Commit 29db5f9b authored by pierre-cau's avatar pierre-cau
Browse files

knn

parent 00d6cf8f
No related branches found
No related tags found
No related merge requests found
......@@ -15,11 +15,9 @@ if __name__ == "__main__":
print(f" - Training data shape: {data_train.shape}, Training labels shape: {labels_train.shape}")
print(f" - Testing data shape: {data_test.shape}, Testing labels shape: {labels_test.shape}")
# We flatten the images
data_train = data_train.reshape(data_train.shape[0], -1)
data_test = data_test.reshape(data_test.shape[0], -1)
# Evaluate the k-NN algorithm
# # Evaluate the k-NN algorithm
k = 3
accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
\ No newline at end of file
No preview for this file type
......@@ -16,9 +16,11 @@ def distance_matrix(matrix1, matrix2):
dists : np.ndarray
L2 Euclidean distance matrix of shape (n1, n2).
"""
assert matrix1.shape[1] == matrix2.shape[1], "Matrices must have the same number of columns"
# Compute the squared sum of each row in both matrices
matrix1_squared = np.sum(np.square(matrix1), axis=1, keepdims=True)
matrix2_squared = np.sum(np.square(matrix2), axis=1, keepdims=True).T
matrix1_squared = np.sum(matrix1**2, axis=1, keepdims=True)
matrix2_squared = np.sum(matrix2**2, axis=1, keepdims=True).T
# Compute the dot product between the two matrices
dot_product = np.dot(matrix1, matrix2.T)
......@@ -49,25 +51,19 @@ def knn_predict(dists, labels_train, k):
n_test = dists.shape[0]
labels_pred = np.empty(n_test, dtype=labels_train.dtype)
for i in range(n_test):
# Find the indices of the k nearest neighbors
sorted_indices = np.argsort(dists[i])
if len(sorted_indices) < k:
k = len(sorted_indices)
print(f"Warning: k is too large, reducing it to {k} as a maximum value.")
nearest_neighbors = sorted_indices[:k]
# Find the indices of the k nearest neighbors for each test point
sorted_indices = np.argsort(dists, axis=1)
nearest_neighbors = sorted_indices[:, :k]
# Retrieve the labels of the k nearest neighbors
nearest_labels = labels_train[nearest_neighbors]
# Determine the most common label among the k nearest neighbors
labels_pred[i] = np.bincount(nearest_labels).argmax()
# Determine the most common label among the k nearest neighbors for each test point
labels_pred = np.array([np.bincount(nearest_labels[i]).argmax() for i in range(nearest_labels.shape[0])])
return labels_pred
def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
def evaluate_knn(data_train, labels_train, data_test, labels_test, k, dists=None):
"""
Evaluate the k-nearest neighbors algorithm on the given dataset.
......@@ -83,6 +79,8 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
Testing labels of shape (n_test,).
k : int
Number of neighbors to consider.
dists : np.ndarray, optional
Distance matrix of shape (n_test, n_train) between the test set and the train set.
Returns
-------
......@@ -91,7 +89,7 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
"""
# Compute the distance matrix between the testing and training data
dists = distance_matrix(data_test, data_train)
print("Distance matrix made successfully")
# Predict the labels for the test set
labels_pred = knn_predict(dists, labels_train, k)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment