Skip to content
Snippets Groups Projects
Select Git revision
  • e5b01982650b55893e5009fb4834cc0ec6545db9
  • master default protected
2 results

Pile.py

Blame
  • Forked from Vuillemot Romain / INF-TC1
    Source project has a limited visibility.
    knn.py 2.23 KiB
    import numpy as np
    import os
    import pickle
    import matplotlib.pyplot as plt
    import plotly.graph_objects as go
    
    
    
    # Create distance Matrix
    '''
    Arguments:
    - Two matrices.
    
    Returns:
    dists: the L2 Euclidean distance matrix.
    The computation of this function should be done solely through matrix manipulations.
    '''
    def distance_matrix(X, Y):
        XX = np.sum(X ** 2, axis=1, keepdims=True)
        YY = np.sum(Y ** 2, axis=1, keepdims=True)
        XY = X @ Y.T
        dists = XX + YY.T - 2 * XY
        return dists
    
    # KNN predict
    '''
    Arguments:
    - dists: the distance matrix between the training set and the test set.
    - labels_train: training labels.
    - k: the number of neighbors.
    
    Returns:
    - Predicted labels for the elements in data_test.
    '''
    def knn_predict(dists, labels_train, k):
        n_test = dists.shape[0]
        y_pred = np.zeros(n_test, dtype=np.int64)
        for i in range(n_test):
            indices = np.argsort(dists[i])[:k]
            k_nearest_labels = labels_train[indices]
            y_pred[i] = np.argmax(np.bincount(k_nearest_labels))
        return y_pred
    
    # evaluate_knn
    '''Here is the code to evaluate k-nearest neighbors and plot the accuracy as a function of k:'''
    '''
    Arguments:
    - data_train: training data.
    - labels_train: corresponding labels.
    - data_test: test data.
    - labels_test: corresponding labels.
    - k: the number of neighbors.
    
    Returns:
    - Accuracy of the Knn model: the classification rate between predicted values and actual observations from test data.
    '''
    def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
        dists = distance_matrix(data_test, data_train)
        y_pred = knn_predict(dists, labels_train, k)
        accuracy = np.mean(y_pred == labels_test)
        return accuracy
    
    # Plot Accuracy of KNN model
    '''The function plots the variation of accuracy with the number of neighbors K.'''
    '''
    Arguments:
    - X_train: training data.
    - y_train: training labels.
    - X_test: test data.
    - y_test: test labels.
    '''
    def plot_KNN(X_train, y_train, X_test, y_test, max_k=20):
        neighbors = np.arange(1, max_k + 1)
        accuracies = [evaluate_knn(X_train, y_train, X_test, y_test, k) for k in neighbors]
        plt.plot(neighbors, accuracies, 'b-o')
        plt.xlabel('K')
        plt.ylabel('Accuracy')
        plt.title('Variation of Accuracy with K')
        plt.savefig("Results/knn.png")