Select Git revision
Forked from
Derrode Stéphane / INF-TC2
Source project has a limited visibility.
-
Derrode Stéphane authoredDerrode Stéphane authored
knn.py 2.23 KiB
import numpy as np
import os
import pickle
import matplotlib.pyplot as plt
import plotly.graph_objects as go
# Create distance Matrix
'''
Arguments:
- Two matrices.
Returns:
dists: the L2 Euclidean distance matrix.
The computation of this function should be done solely through matrix manipulations.
'''
def distance_matrix(X, Y):
XX = np.sum(X ** 2, axis=1, keepdims=True)
YY = np.sum(Y ** 2, axis=1, keepdims=True)
XY = X @ Y.T
dists = XX + YY.T - 2 * XY
return dists
# KNN predict
'''
Arguments:
- dists: the distance matrix between the training set and the test set.
- labels_train: training labels.
- k: the number of neighbors.
Returns:
- Predicted labels for the elements in data_test.
'''
def knn_predict(dists, labels_train, k):
n_test = dists.shape[0]
y_pred = np.zeros(n_test, dtype=np.int64)
for i in range(n_test):
indices = np.argsort(dists[i])[:k]
k_nearest_labels = labels_train[indices]
y_pred[i] = np.argmax(np.bincount(k_nearest_labels))
return y_pred
# evaluate_knn
'''Here is the code to evaluate k-nearest neighbors and plot the accuracy as a function of k:'''
'''
Arguments:
- data_train: training data.
- labels_train: corresponding labels.
- data_test: test data.
- labels_test: corresponding labels.
- k: the number of neighbors.
Returns:
- Accuracy of the Knn model: the classification rate between predicted values and actual observations from test data.
'''
def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
dists = distance_matrix(data_test, data_train)
y_pred = knn_predict(dists, labels_train, k)
accuracy = np.mean(y_pred == labels_test)
return accuracy
# Plot Accuracy of KNN model
'''The function plots the variation of accuracy with the number of neighbors K.'''
'''
Arguments:
- X_train: training data.
- y_train: training labels.
- X_test: test data.
- y_test: test labels.
'''
def plot_KNN(X_train, y_train, X_test, y_test, max_k=20):
neighbors = np.arange(1, max_k + 1)
accuracies = [evaluate_knn(X_train, y_train, X_test, y_test, k) for k in neighbors]
plt.plot(neighbors, accuracies, 'b-o')
plt.xlabel('K')
plt.ylabel('Accuracy')
plt.title('Variation of Accuracy with K')
plt.savefig("Results/knn.png")