# -*- coding: utf-8 -*- """ Created on Fri Oct 20 17:39:37 2023 @author: oscar """ import read_cifar import numpy as np import statistics from statistics import mode import time import matplotlib.pyplot as plt def distance_matrix(A,B) : print("test0") sum_of_squaresA= np.sum(A**2, axis = 1, keepdims = True) sum_of_squaresB = np.sum(B**2, axis = 1) print("test1") # sum_of_squaresA = np.tile(sum_of_squaresAVect, (np.shape(B)[0], 1)) # sum_of_squaresB = np.tile(sum_of_squaresBVect, (np.shape(A)[0], 1)) # Calculate the dot product between the two matrices # dot_product = np.matmul(A, B.T) dot_product = np.einsum('ij,jk', A, B.T) print("test2") # Calculate the Euclidean distance matrix using the hint provided dists = np.sqrt(sum_of_squaresA + sum_of_squaresB - 2 * dot_product) print("test3") return dists def knn_predict(dists, labels_train, k) : number_train, number_test = dists.shape # initialze the predicted labels to zeros labels_predicted = np.zeros(number_test) for j in range(number_test) : sorted_indices = np.argsort(dists[:, j]) print(len(dists[:, j])) break knn_indices = sorted_indices[ : k] knn_labels = labels_train[knn_indices] label_predicted = mode(knn_labels) labels_predicted[j] = label_predicted return labels_predicted def evaluate_knn(data_train, labels_train, data_test, labels_test, k) : dists = distance_matrix(data_train, data_test) labels_predicted = knn_predict(dists, labels_train, k) number_true_prediction = np.sum(labels_test == labels_predicted) number_total_prediction = labels_test.shape[0] classification_rate = number_true_prediction/number_total_prediction return classification_rate if __name__ == "__main__" : t1 = time.time() # # Example distance matrix, training labels, and k value # dists = np.array([[1000, 2, 3], # [4, 0.1, 6], # [7, 8, 0]]) # labels_train = np.array([0, 1, 5]) # k = 2 # # Predict labels for the test set using k-NN # predicted_labels = knn_predict(dists, labels_train, k) # classification_rate = evaluate_knn(np.array([[1, 27], [100, 300]]), np.array([0.002, 9000]), np.array([[25, 350]]), np.array([9000]), 1) # print("Classification rate:") # print(classification_rate) file = "./data/cifar-10-python/" data, labels = read_cifar.read_cifar(file) data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9) k = 10 print(len(data_train)) print(len(data_test)) print(len(data_train[0])) print(len(data_test[0])) # dists = distance_matrix(data_train, data_test) # knn_predict(dists, labels_train, k) classification_rate = evaluate_knn(data_train, labels_train, data_test, labels_test, k) print("classification rate :", classification_rate) # plot_accuracy(data_train, labels_train, data_test, labels_test, 4) t2 = time.time() print('run time (second): ') print(t2-t1)