Skip to content
Snippets Groups Projects
Select Git revision
  • 5cba93d25eb47a677c14a56ef90a1e6176d544c2
  • master default protected
2 results

test_lifoqueue.py

Blame
  • knn.py 3.05 KiB
    # -*- coding: utf-8 -*-
    """
    Created on Fri Oct 20 17:39:37 2023
    
    @author: oscar
    """
    import read_cifar
    import numpy as np
    import statistics
    from statistics import mode
    import time
    import matplotlib.pyplot as plt
    
    def distance_matrix(A,B) : 
        print("test0")
        sum_of_squaresA= np.sum(A**2, axis = 1, keepdims = True)
        sum_of_squaresB = np.sum(B**2, axis = 1)
        print("test1")
        # sum_of_squaresA = np.tile(sum_of_squaresAVect, (np.shape(B)[0], 1))
        # sum_of_squaresB = np.tile(sum_of_squaresBVect, (np.shape(A)[0], 1))
    
        # Calculate the dot product between the two matrices
        # dot_product = np.matmul(A, B.T)
        dot_product = np.einsum('ij,jk', A, B.T)
        print("test2")
        # Calculate the Euclidean distance matrix using the hint provided
        dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product)
        print("test3")
        return dists
    
    def knn_predict(dists, labels_train, k) : 
        number_train, number_test = dists.shape
        
        # initialze the predicted labels to zeros
        labels_predicted = np.zeros(number_test)
        
        for j in range(number_test) : 
            sorted_indices = np.argsort(dists[:, j])
            print(len(dists[:, j]))
            break
            knn_indices = sorted_indices[ : k]
            knn_labels = labels_train[knn_indices]
            label_predicted = mode(knn_labels)
            labels_predicted[j] = label_predicted
            
        return labels_predicted
    
    def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
        dists = distance_matrix(data_train, data_test)
        labels_predicted = knn_predict(dists, labels_train, k)
        number_true_prediction = np.sum(labels_test == labels_predicted)
        number_total_prediction = labels_test.shape[0]
        classification_rate = number_true_prediction/number_total_prediction
        
        return classification_rate   
        
    if __name__ == "__main__" :
        t1 = time.time()
        # # Example distance matrix, training labels, and k value
        # dists = np.array([[1000, 2, 3],
        #                  [4, 0.1, 6],
        #                  [7, 8, 0]])
        # labels_train = np.array([0, 1, 5])
        # k = 2
    
        # # Predict labels for the test set using k-NN
        # predicted_labels = knn_predict(dists, labels_train, k)
    
        
        # classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1)
        # print("Classification rate:")
        # print(classification_rate)    
    
        file = "./data/cifar-10-python/"
        data, labels = read_cifar.read_cifar(file)
        data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9)
        k = 10
        print(len(data_train))
        print(len(data_test))
        print(len(data_train[0]))
        print(len(data_test[0]))
        # dists = distance_matrix(data_train, data_test)
        # knn_predict(dists, labels_train, k)
        classification_rate = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
        print("classification rate :", classification_rate)
        # plot_accuracy(data_train, labels_train, data_test, labels_test, 4)
        t2 = time.time()
        print('run time (second): ')
        print(t2-t1)