Skip to content
Snippets Groups Projects
Select Git revision
  • 68c5eeee033e935acb6fd08176cbcd422a3da2b9
  • master default protected
2 results

04-bases_exercices.py

Blame
  • knn.py 2.44 KiB
    import numpy as np
    from read_cifar import *
    import matplotlib.pyplot as plt
    
    
    def distance_matrix(mat1, mat2):
        # A^2 and B^2
        square1 = np.sum(np.square(mat1), axis = 1, keepdims=True)
        square2 = np.sum(np.square(mat2), axis = 1, keepdims=True)
        # A*B
        prod = np.dot(mat1, mat2.T)
        # A^2 + B^2 -2*A*B
        dists = np.sqrt(square1 + square2.T - 2 * prod)
        return dists
    
    def knn_predict(dists, labels_train, k):
        # results matrix initialization
        predicted_labels = np.zeros(len(dists))
        # loop on all the test images
        for i in range(len(dists)):
            # sort and keep the k shortest dists for test image i
            sorted_dists = np.argsort(dists[i])
            k_sorted_dists = sorted_dists[:k]
            # get the matching labels_train
            closest_labels = labels_train[k_sorted_dists]
            # get the most common labels_train
            uniques, counts = np.unique(closest_labels, return_counts = True)
            predicted_labels[i] = uniques[np.argmax(counts)]
        return np.array(predicted_labels)
    
    def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
        dists = distance_matrix(data_test, data_train)
        # Determine the number of images in data_test
        tot = len(data_test)
        accurate = 0
        predicted_labels = knn_predict(dists, labels_train, k)
        # Count the number of images in data_test whose label has been estimated correctly
        for i in range(tot):
            if predicted_labels[i] == labels_test[i]:
                accurate += 1
        # Calculate the classification rate
        accuracy = accurate/tot
        return accuracy
    
    
    
    
    
    
    
    
    if __name__ == "__main__":
    
        data, labels = read_cifar("./data/cifar-10-batches-py")
        data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.9)
        
        k_list = [k for k in range(1, 21)]
        accuracy = [evaluate_knn(data_train, labels_train, data_test, labels_test, k) for k in range (1, 21)]
        
        plt.plot([k for k in range (1, 21)], accuracy)
        plt.title("Variation of k-nearest neighbors method accuracy for k from 1 to 20")
        plt.xlabel("k value")
        plt.ylabel("Accuracy")
        plt.grid(True, which='both')
        plt.savefig("results/knn.png")
    
    
        # x_test = np.array([[1,2],[4,6]])
        # x_labels_test = np.array([0,1])
        # x_train = np.array([[2,4],[7,2],[4,6]])
        # x_labels_train = np.array([0,1,1])
    
        # dist = distance_matrix(x_test, x_train)
        # accuracy = evaluate_knn(x_train, x_labels_train, x_test, x_labels_test, 1)
        # print(accuracy)