diff --git a/data/cifar-10-batches-py/batches.meta b/data/cifar-10-batches-py/batches.meta new file mode 100644 index 0000000000000000000000000000000000000000..4467a6ec2e886a9f14f25e31776fb0152d8ac64a Binary files /dev/null and b/data/cifar-10-batches-py/batches.meta differ diff --git a/data/cifar-10-batches-py/data_batch_1 b/data/cifar-10-batches-py/data_batch_1 new file mode 100644 index 0000000000000000000000000000000000000000..ab404a5ac32492b807a5c6cd02b83dc4dd5ff980 Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_1 differ diff --git a/data/cifar-10-batches-py/data_batch_2 b/data/cifar-10-batches-py/data_batch_2 new file mode 100644 index 0000000000000000000000000000000000000000..6bf1369a6cacadfdbd2f8c61e354cc7d0c17bbae Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_2 differ diff --git a/data/cifar-10-batches-py/data_batch_3 b/data/cifar-10-batches-py/data_batch_3 new file mode 100644 index 0000000000000000000000000000000000000000..66a0d630a7eb736563b1861ce716bdc489f2113b Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_3 differ diff --git a/data/cifar-10-batches-py/data_batch_4 b/data/cifar-10-batches-py/data_batch_4 new file mode 100644 index 0000000000000000000000000000000000000000..cf8d03d1e80e6d9e440d1764faa85aedd1d6b960 Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_4 differ diff --git a/data/cifar-10-batches-py/data_batch_5 b/data/cifar-10-batches-py/data_batch_5 new file mode 100644 index 0000000000000000000000000000000000000000..468b2aa538c551bc9f590f213b19d96915b85062 Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_5 differ diff --git a/data/cifar-10-batches-py/readme.html b/data/cifar-10-batches-py/readme.html new file mode 100644 index 0000000000000000000000000000000000000000..e377adef45c85dc91051edf2dee72c1d4d57732c --- /dev/null +++ b/data/cifar-10-batches-py/readme.html @@ -0,0 +1 @@ +<meta HTTP-EQUIV="REFRESH" content="0; url=http://www.cs.toronto.edu/~kriz/cifar.html"> diff --git a/data/cifar-10-batches-py/test_batch b/data/cifar-10-batches-py/test_batch new file mode 100644 index 0000000000000000000000000000000000000000..3e03f1fc5261d102600fc1c130454f1f5cda567b Binary files /dev/null and b/data/cifar-10-batches-py/test_batch differ diff --git a/knn.py b/knn.py new file mode 100644 index 0000000000000000000000000000000000000000..e499927f04da9d76f601eb809fc64c3c0bd6ee72 --- /dev/null +++ b/knn.py @@ -0,0 +1,20 @@ +import numpy as np + +def distance_matrix(matrix_a: np.ndarray, matrix_b: np.ndarray): + sum_squares_1 = np.sum(matrix_a**2, axis = 1, keepdims = True) + sum_squares_2 = np.sum(matrix_b**2, axis = 1, keepdims = True) + + dot_product = np.dot(matrix_a, matrix_b.T) + dists = np.sqrt(sum_squares_1 - 2*dot_product + sum_squares_2.T) + + return dists + +def knn_predict(dists: np.ndarray, labels_train: np.ndarray, k:int): + return 0 + + + +if __name__ == "__main__": + A = np.ones((3,3)) + B = np.ones((3,3))*2 + dist = distance_matrix(A, B) \ No newline at end of file diff --git a/read_cifar.py b/read_cifar.py new file mode 100644 index 0000000000000000000000000000000000000000..5d1e7800d34c2962bc23fb25349e61409eac830a --- /dev/null +++ b/read_cifar.py @@ -0,0 +1,54 @@ +import numpy as np +import pickle + +def read_cifar_batch(path_of_batch: str): + with open("data/cifar-10-batches-py/" + path_of_batch, 'rb') as fo: + dict = pickle.load(fo, encoding='bytes') + + #batch_size = np.size(dict[b'labels']) + #data_size = np.size(dict[b'data'][0, :]) + + data = np.array(dict[b'data'], dtype=np.float32) + labels = np.array(dict[b'labels'], dtype = np.int64) + return data, labels + +def read_cifar(): + directory = "data/cifar-10-batches-py/" + #We are computing for the 5 first batchs + data = [] + labels = [] + for i in range(3): + path_batch = directory + '/data_batch_' + str(i+1) + with open(path_batch, 'rb') as fo: + dict = pickle.load(fo, encoding='bytes') + #single batch variables + data.append(dict[b'data']) + labels.append(dict[b'labels']) + + #We are computing for test_batch + path_test_batch = directory + '/test_batch' + with open(path_test_batch, 'rb') as fo: + dict = pickle.load(fo, encoding='bytes') + data.append(dict[b'data']) + labels.append(dict[b'labels']) + data = np.array(data, np.float32) + labels = np.array(labels, np.int64) + return np.reshape(data, (np.size(data, 0)*np.size(data, 1), np.size(data, 2))), np.reshape(labels, (np.size(labels, 0)*np.size(labels, 1), 1)) + + +def split_dataset(data: np.ndarray, labels: np.ndarray, split: float): + shuffled_index = np.random.permutation(np.size(data, 0)) + shuffled_data = data[shuffled_index] + shuffled_labels = labels[shuffled_index] + data_train = shuffled_data[0:int(np.size(data, 0)*split)] + labels_train = shuffled_labels[0:int(np.size(labels, 0)*split)] + data_test = shuffled_data[int(np.size(data, 0)*split):] + labels_test = shuffled_labels[int(np.size(labels, 0)*split):] + + return data_train, labels_train, data_test, labels_test + + +if __name__ == "__main__": + data, labels = read_cifar() + a, b, c, d = split_dataset(data, labels, 0.8) + print(1)