diff --git a/knn.py b/knn.py index 6c1936149616bec516899f95137c135bf316a821..6dc8f75d47dfa2ce56846eaee210bde64c8d8b3a 100644 --- a/knn.py +++ b/knn.py @@ -9,63 +9,65 @@ from sklearn.neighbors import KNeighborsRegressor import matplotlib.pyplot as plt -def distance_matrix(data_test,data_train): +def distance_matrix(data_test, data_train): + """Takes the matrix data_test and data_train. It returning a 2d array(N,M) such that dists[i,j] represents + the distance between the i-th data_test row and the j-th data_train row + """ + dists = np.array([np.sum((data_train - l) ** 2, axis=1) ** 0.5 for l in data_test]) - dists = np.array([np.sum((data_train-l)**2,axis=1)**.5 for l in data_test]) + return dists - return dists -#receives a 2d array data_train(M,k) and a data_test (N,k), -#returning a 2d array(N,M) such that dists[i,j] represents -#the distance between the i-th data_test row and the j-th data_train row -#in resume, each column represent a distance of a training point to all other -def knn_predict(dists , labels_train , k): - #classif = np.array(0) - print(labels_train[:20]) - print(labels_train.size) - classif = [] +def knn_predict(dists, labels_train, k): + """Take the matrix of distances dists, the labels for training and k nearest neighbor + It returns the classification given by the module KNN. + """ + # classif = np.array(0) + print(labels_train[:20]) + print(labels_train.size) + classif = [] - for testRows in dists.T: - - distances = np.stack((testRows,labels_train),axis = 1) - distances = distances[distances[:, 0].argsort()] - #for picturesClasses in distances[:k,1]: - countArray = [np.count_nonzero(distances[:k,1]==i) for i in range(0,10)] - classif = np.append(classif,np.argmax(countArray)) + for testRows in dists.T: - classif = np.array(classif , dtype = int) + distances = np.stack((testRows, labels_train), axis=1) + distances = distances[distances[:, 0].argsort()] + # for picturesClasses in distances[:k,1]: + countArray = [np.count_nonzero(distances[:k, 1] == i) for i in range(0, 10)] + classif = np.append(classif, np.argmax(countArray)) - return classif + classif = np.array(classif, dtype=int) -def evaluate_knn(data_train,labels_train,data_test,labels_test,k): + return classif - classif = np.array(knn_predict(distance_matrix(data_train,data_test) , labels_train , k)) - result = np.array(classif == labels_test) - acc = np.count_nonzero(result) / np.size(result) - return acc*100 +def evaluate_knn(data_train, labels_train, data_test, labels_test, k): + """Receives the datas ans labels for training and teste and k nearest neighbor. + It retuns the accuracy of the KNN module""" + classif = np.array( + knn_predict(distance_matrix(data_train, data_test), labels_train, k) + ) + result = np.array(classif == labels_test) + acc = np.count_nonzero(result) / np.size(result) + return acc * 100 -datas,labels = read_cifar_batch('data_batch_1') -print(datas.shape,labels.shape) -dataTrain,dataTest,labelsTrain,labelsTest = split_dataset(datas,labels) -print(dataTrain.shape,dataTest.shape,labelsTrain.shape) -distanceMatrix = distance_matrix(dataTrain,dataTest) -print(distanceMatrix.shape) +datas, labels = read_cifar_batch("data_batch_1") +dataTrain, dataTest, labelsTrain, labelsTest = split_dataset(datas, labels) +distanceMatrix = distance_matrix(dataTrain, dataTest) print() result = [] -for i in range (1,21): - result = np.append(result,evaluate_knn(dataTrain,labelsTrain,dataTest,labelsTest,i)) +for i in range(1, 21): + result = np.append( + result, evaluate_knn(dataTrain, labelsTrain, dataTest, labelsTest, i) + ) x = np.arange(1, 21) - -# plotting + +# plot the graph of (Accuracy) x k plt.title("Plot graph") plt.xlabel("K neighbors") plt.ylabel("Accuracy %") -plt.plot(x, result, color ="red") +plt.plot(x, result, color="red") plt.show() - -