Skip to content
Snippets Groups Projects
Commit 4eba64b5 authored by Muniz Silva Samuel's avatar Muniz Silva Samuel
Browse files

final commit

parent cdb0f72c
Branches
No related tags found
No related merge requests found
......@@ -9,20 +9,34 @@ from sklearn.neighbors import KNeighborsRegressor
import matplotlib.pyplot as plt
#
# ATTENTION : THIS CODE IS CONCATENATION OF THE CODES read_cifar.py AND knn.py
#
def unpickle(file):
"""Use to Unpack the CIFAR10 dataset as a pickle. It returns a dictinary with the dataset and its labels."""
import pickle
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
with open(file, "rb") as fo:
dict = pickle.load(fo, encoding="bytes")
return dict
def read_cifar_batch(path):
"""Taking as parameter the path of a single batch as a string, and returning:
matrix data of size (batch_size x data_size) and a vector labels of size batch_size"""
dictionary = unpickle(path)
data = np.array(dictionary[b'data'], dtype = np.float32)
labels = np.array(dictionary[b'labels'], dtype = np.int64)
data = np.array(dictionary[b"data"], dtype=np.float32)
labels = np.array(dictionary[b"labels"], dtype=np.int64)
return data, labels
def read_cifar(path1, path2, path3, path4, path5, path6):
"""taking as parameter the path of the directory containing the six batches
(five data_batch and one test_batch) as a string, and returning:
a matrix data of shape (batch_size x data_size) and a vector labels of size batch_size"""
data, labels = read_cifar_batch(path1)
dataAux, labelsAux = read_cifar_batch(path2)
data = np.concatenate((data, dataAux), 0)
......@@ -43,25 +57,28 @@ def read_cifar(path1,path2,path3,path4,path5,path6):
return data, labels
def split_dataset(data, labels):
data_train, data_test , labels_train , labels_test = train_test_split(data, labels,shuffle = True ,test_size = 0.1)
"""which splits the dataset into a training set and a test set"""
data_train, data_test, labels_train, labels_test = train_test_split(
data, labels, shuffle=True, test_size=0.1
)
return data_train, data_test, labels_train, labels_test
def distance_matrix(data_test, data_train):
dists = np.array([np.sum((data_train-l)**2,axis=1)**.5 for l in data_test])
"""Takes the matrix data_test and data_train. It returning a 2d array(N,M) such that dists[i,j] represents
the distance between the i-th data_test row and the j-th data_train row
"""
dists = np.array([np.sum((data_train - l) ** 2, axis=1) ** 0.5 for l in data_test])
return dists
#receives a 2d array data_train(M,k) and a data_test (N,k),
#returning a 2d array(N,M) such that dists[i,j] represents
#the distance between the i-th data_test row and the j-th data_train row
#in resume, each column represent a distance of a training point to all other
def knn_predict(dists, labels_train, k):
"""Take the matrix of distances dists, the labels for training and k nearest neighbor
It returns the classification given by the module KNN.
"""
# classif = np.array(0)
print(labels_train[:20])
print(labels_train.size)
......@@ -79,34 +96,36 @@ def knn_predict(dists , labels_train , k):
return classif
def evaluate_knn(data_train,labels_train,data_test,labels_test,k):
classif = np.array(knn_predict(distance_matrix(data_train,data_test) , labels_train , k))
def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
"""Receives the datas ans labels for training and teste and k nearest neighbor.
It retuns the accuracy of the KNN module"""
classif = np.array(
knn_predict(distance_matrix(data_train, data_test), labels_train, k)
)
result = np.array(classif == labels_test)
acc = np.count_nonzero(result) / np.size(result)
return acc * 100
datas,labels = read_cifar_batch('data_batch_1')
print(datas.shape,labels.shape)
datas, labels = read_cifar_batch("data_batch_1")
dataTrain, dataTest, labelsTrain, labelsTest = split_dataset(datas, labels)
print(dataTrain.shape,dataTest.shape,labelsTrain.shape)
distanceMatrix = distance_matrix(dataTrain, dataTest)
print(distanceMatrix.shape)
print()
result = []
# Apply various KNN moduli with k ranging from 1 to 20
for i in range(1, 21):
result = np.append(result,evaluate_knn(dataTrain,labelsTrain,dataTest,labelsTest,i))
result = np.append(
result, evaluate_knn(dataTrain, labelsTrain, dataTest, labelsTest, i)
)
x = np.arange(1, 21)
# plotting
# plot the graph of (Accuracy) x k
plt.title("Plot graph")
plt.xlabel("K neighbors")
plt.ylabel("Accuracy %")
plt.plot(x, result, color="red")
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment