Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
Loading items

Target

Select target project
  • bbrudysa/image-classification
1 result
Select Git revision
Loading items
Show changes
Commits on Source (9)
No preview for this file type
import numpy as np
import read_cifar
import matplotlib.pyplot as plt
def distance_matrix(A,B):
sum_of_squares_A = np.sum(A**2, axis=1,keepdims=True)
sum_of_squares_B = np.sum(B**2, axis=1,keepdims=True).T
dot_product = np.dot(A, B.T)
dists=np.sqrt(sum_of_squares_A+sum_of_squares_B-2*dot_product)
return dists
#def knn_predict(dists, labels_train, k):
#
#
def knn_predict(dists, labels_train, k):
predicted_labels = []
# For every image in the test set
for i in range(len(dists)):
# Initialize an array to store the neighbors
classes = [0] * 10
# indexes of the closest neighbors
indexes_closest_nb = np.argsort(dists[i])[:k]
for index in indexes_closest_nb:
#find the labels of the training batch associated with the closest indexes
classes[labels_train[index]] += 1
#The class with the highest neighbors is added to the predicted labels
predicted_labels.append(np.argmax(classes))
return(np.array(predicted_labels))
def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
rate = 0
dist_train_test = distance_matrix(data_test, data_train)
prediction = knn_predict(dist_train_test, labels_train, k)
print(len(prediction))
print(len(labels_test))
for j in range(len(prediction)):
if prediction[j]==labels_test[j]:
rate +=1
rate = rate/len(prediction)
return rate
def knn_final():
range_k = range(1,20)
rates = []
data,labels = read_cifar.read_cifar("data/cifar-10-batches-py")
data_train_f, labels_train_f, data_test_f, labels_test_f = read_cifar.split_dataset(data, labels, 0.9)
for k in range_k :
print(k)
rate_k = evaluate_knn(data_train_f, labels_train_f, data_test_f, labels_test_f, k)
rates.append(rate_k)
plt.figure(figsize=(10, 7))
plt.xlabel('k')
plt.ylabel('Accuracy rate')
plt.plot(range_k, rates)
plt.title("Accuracy rate = f(k)")
plt.legend()
plt.grid(True)
plt.show()
if __name__ == "__main__" :
knn_final()
#a1 = np.array([[0,0,1],[0,0,0],[1,1,2]])
#b1 = np.array([[1,3,1], [1,1,4], [1,5,1]])
#print(distance_matrix(a1,b1))
\ No newline at end of file
import numpy
import numpy as np
import pickle
def unpickle(file):
def read_cifar_batch(file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
data = np.array(dict[b'data']).astype('float32')
labels = np.array(dict[b'labels']).astype('int64')
return data, labels
#vect1= read_cifar_batch("data/cifar-10-batches-py/data_batch_1")
#print(vect1)
def read_cifar(directory):
data = []
labels = []
for i in range(1,6):
data_v, labels_v = read_cifar_batch(f'{directory}/data_batch_{i}')
data.append(data_v)
labels.append(labels_v)
data_v, labels_v = read_cifar_batch(f'{directory}/test_batch')
data.append(data_v)
labels.append(labels_v)
data = np.concatenate(data, axis = 0)
labels = np.concatenate(labels, axis = 0)
return(data, labels)
def split_dataset(data, labels, split):
data_size = data.shape[0]
train_size = int(data_size * split)
indices = np.arange(data_size)
np.random.shuffle(indices)
indices_train = indices[:train_size]
indices_test = indices[train_size:]
data_train = data[indices_train]
labels_train = labels[indices_train]
data_test = data[indices_test]
labels_test = labels[indices_test]
return(data_train, labels_train, data_test, labels_test)
dict1= unpickle("data/cifar-10-batches-py/data_batch_1")
data = [dict1[key] for key in dict1]
if __name__ == "__main__":
#vect1= read_cifar_batch("data/cifar-10-batches-py/data_batch_1")
#print(vect1)
#vect2= read_cifar("data/cifar-10-batches-py")
#print(vect2)
print(len(data[1]))
pair = read_cifar("data/cifar-10-batches-py")
#def read_cifar_batch(path) :
vect3= split_dataset(pair[0], pair[1], 0.6)
print(vect3)
results/knn.png

41.4 KiB