Upload New File

5d0cd40f · Muniz Silva Samuel · ff0ad89d · 5d0cd40f
Commit 5d0cd40f authored 2 years ago by Muniz Silva Samuel
--- a/read_cifar.py
+++ b/read_cifar.py
+import numpy as np
+import tensorflow as tf
+import pandas as pd
+import pickle
+import os
+import scipy
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsRegressor
+import matplotlib.pyplot as plt
+
+
+def unpickle(file):
+    import pickle
+    with open(file, 'rb') as fo:
+        dict = pickle.load(fo, encoding='bytes')
+    return dict
+
+def read_cifar_batch(path):
+  dictionary = unpickle(path)
+  data = np.array(dictionary[b'data'], dtype = np.float32)
+  labels = np.array(dictionary[b'labels'], dtype = np.int64)
+  
+  return data , labels
+
+def read_cifar(path1,path2,path3,path4,path5,path6):
+  data , labels = read_cifar_batch(path1)
+  dataAux , labelsAux = read_cifar_batch(path2)
+  data = np.concatenate((data,dataAux),0)
+  labels = np.concatenate((labels,labelsAux))
+  dataAux , labelsAux = read_cifar_batch(path3)
+  data = np.concatenate((data,dataAux),0)
+  labels = np.concatenate((labels,labelsAux))
+  dataAux , labelsAux = read_cifar_batch(path4)
+  data = np.concatenate((data,dataAux),0)
+  labels = np.concatenate((labels,labelsAux))
+  dataAux , labelsAux = read_cifar_batch(path5)
+  data = np.concatenate((data,dataAux),0)
+  labels = np.concatenate((labels,labelsAux))
+  dataAux , labelsAux = read_cifar_batch(path6)
+  data = np.concatenate((data,dataAux),0)
+  labels = np.concatenate((labels,labelsAux))
+
+  return data , labels
+
+
+
+def split_dataset(data , labels):
+
+  data_train, data_test , labels_train , labels_test = train_test_split(data, labels,shuffle = True ,test_size = 0.1)
+  
+  return data_train, data_test , labels_train , labels_test
+
+
+def distance_matrix(data_test,data_train):
+
+  dists = np.array([np.sum((data_train-l)**2,axis=1)**.5 for l in data_test])
+
+  return dists
+#receives a 2d array data_train(M,k) and a data_test (N,k), 
+#returning a 2d array(N,M) such that dists[i,j] represents 
+#the distance between the i-th data_test row and the j-th data_train row
+#in resume,  each column represent a distance of a training point to all other
+
+def knn_predict(dists , labels_train , k):
+  #classif = np.array(0)
+  print(labels_train[:20])
+  print(labels_train.size)
+  classif = []
+
+  for testRows in dists.T:
+    
+    distances = np.stack((testRows,labels_train),axis = 1)
+    distances = distances[distances[:, 0].argsort()]
+    #for picturesClasses in distances[:k,1]:
+    countArray = [np.count_nonzero(distances[:k,1]==i) for i in range(0,10)]
+    classif = np.append(classif,np.argmax(countArray))     
+
+  classif = np.array(classif , dtype = int)    
+
+  return classif
+
+def evaluate_knn(data_train,labels_train,data_test,labels_test,k):
+
+  classif = np.array(knn_predict(distance_matrix(data_train,data_test) , labels_train , k)) 
+  result = np.array(classif == labels_test)
+  acc = np.count_nonzero(result) / np.size(result)
+
+  return acc*100
+
+
+
+datas,labels = read_cifar_batch('data_batch_1')
+print(datas.shape,labels.shape)
+dataTrain,dataTest,labelsTrain,labelsTest = split_dataset(datas,labels)
+print(dataTrain.shape,dataTest.shape,labelsTrain.shape)
+distanceMatrix = distance_matrix(dataTrain,dataTest)
+print(distanceMatrix.shape)
+print()
+
+result = []
+for i in range (1,21):
+  result = np.append(result,evaluate_knn(dataTrain,labelsTrain,dataTest,labelsTest,i)) 
+
+x = np.arange(1, 21)
+ 
+# plotting
+plt.title("Plot graph")
+plt.xlabel("K neighbors")
+plt.ylabel("Accuracy %")
+plt.plot(x, result, color ="red")
+plt.show()
+