diff --git a/data/cifar-10-batches-py/batches.meta b/data/cifar-10-batches-py/batches.meta
new file mode 100644
index 0000000000000000000000000000000000000000..4467a6ec2e886a9f14f25e31776fb0152d8ac64a
Binary files /dev/null and b/data/cifar-10-batches-py/batches.meta differ
diff --git a/data/cifar-10-batches-py/data_batch_1 b/data/cifar-10-batches-py/data_batch_1
new file mode 100644
index 0000000000000000000000000000000000000000..ab404a5ac32492b807a5c6cd02b83dc4dd5ff980
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_1 differ
diff --git a/data/cifar-10-batches-py/data_batch_2 b/data/cifar-10-batches-py/data_batch_2
new file mode 100644
index 0000000000000000000000000000000000000000..6bf1369a6cacadfdbd2f8c61e354cc7d0c17bbae
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_2 differ
diff --git a/data/cifar-10-batches-py/data_batch_3 b/data/cifar-10-batches-py/data_batch_3
new file mode 100644
index 0000000000000000000000000000000000000000..66a0d630a7eb736563b1861ce716bdc489f2113b
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_3 differ
diff --git a/data/cifar-10-batches-py/data_batch_4 b/data/cifar-10-batches-py/data_batch_4
new file mode 100644
index 0000000000000000000000000000000000000000..cf8d03d1e80e6d9e440d1764faa85aedd1d6b960
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_4 differ
diff --git a/data/cifar-10-batches-py/data_batch_5 b/data/cifar-10-batches-py/data_batch_5
new file mode 100644
index 0000000000000000000000000000000000000000..468b2aa538c551bc9f590f213b19d96915b85062
Binary files /dev/null and b/data/cifar-10-batches-py/data_batch_5 differ
diff --git a/data/cifar-10-batches-py/readme.html b/data/cifar-10-batches-py/readme.html
new file mode 100644
index 0000000000000000000000000000000000000000..e377adef45c85dc91051edf2dee72c1d4d57732c
--- /dev/null
+++ b/data/cifar-10-batches-py/readme.html
@@ -0,0 +1 @@
+<meta HTTP-EQUIV="REFRESH" content="0; url=http://www.cs.toronto.edu/~kriz/cifar.html">
diff --git a/data/cifar-10-batches-py/test_batch b/data/cifar-10-batches-py/test_batch
new file mode 100644
index 0000000000000000000000000000000000000000..3e03f1fc5261d102600fc1c130454f1f5cda567b
Binary files /dev/null and b/data/cifar-10-batches-py/test_batch differ
diff --git a/knn.py b/knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..e499927f04da9d76f601eb809fc64c3c0bd6ee72
--- /dev/null
+++ b/knn.py
@@ -0,0 +1,20 @@
+import numpy as np
+
+def distance_matrix(matrix_a: np.ndarray, matrix_b: np.ndarray):
+    sum_squares_1 = np.sum(matrix_a**2, axis = 1, keepdims = True)
+    sum_squares_2 = np.sum(matrix_b**2, axis = 1, keepdims = True)
+
+    dot_product = np.dot(matrix_a, matrix_b.T)
+    dists = np.sqrt(sum_squares_1 - 2*dot_product + sum_squares_2.T)
+
+    return dists
+
+def knn_predict(dists: np.ndarray, labels_train: np.ndarray, k:int):
+    return 0
+
+
+
+if __name__ == "__main__":
+    A = np.ones((3,3))
+    B = np.ones((3,3))*2
+    dist = distance_matrix(A, B)
\ No newline at end of file
diff --git a/read_cifar.py b/read_cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d1e7800d34c2962bc23fb25349e61409eac830a
--- /dev/null
+++ b/read_cifar.py
@@ -0,0 +1,54 @@
+import numpy as np
+import pickle
+
+def read_cifar_batch(path_of_batch: str):
+    with open("data/cifar-10-batches-py/" + path_of_batch, 'rb') as fo:
+        dict = pickle.load(fo, encoding='bytes')
+    
+    #batch_size = np.size(dict[b'labels'])
+    #data_size = np.size(dict[b'data'][0, :])
+
+    data = np.array(dict[b'data'], dtype=np.float32)
+    labels = np.array(dict[b'labels'], dtype = np.int64)
+    return data, labels
+
+def read_cifar():
+    directory = "data/cifar-10-batches-py/"
+    #We are computing for the 5 first batchs
+    data = []
+    labels = []
+    for i in range(3):
+        path_batch = directory + '/data_batch_' + str(i+1)
+        with open(path_batch, 'rb') as fo:
+            dict = pickle.load(fo, encoding='bytes')
+        #single batch variables
+        data.append(dict[b'data'])
+        labels.append(dict[b'labels'])
+
+    #We are computing for test_batch
+    path_test_batch = directory + '/test_batch'
+    with open(path_test_batch, 'rb') as fo:
+        dict = pickle.load(fo, encoding='bytes')
+    data.append(dict[b'data'])
+    labels.append(dict[b'labels'])
+    data = np.array(data, np.float32)
+    labels = np.array(labels, np.int64)
+    return np.reshape(data, (np.size(data, 0)*np.size(data, 1), np.size(data, 2))), np.reshape(labels, (np.size(labels, 0)*np.size(labels, 1), 1))
+
+
+def split_dataset(data: np.ndarray, labels: np.ndarray, split: float):
+    shuffled_index = np.random.permutation(np.size(data, 0))
+    shuffled_data = data[shuffled_index]
+    shuffled_labels = labels[shuffled_index]
+    data_train = shuffled_data[0:int(np.size(data, 0)*split)]
+    labels_train = shuffled_labels[0:int(np.size(labels, 0)*split)]
+    data_test = shuffled_data[int(np.size(data, 0)*split):]
+    labels_test = shuffled_labels[int(np.size(labels, 0)*split):]
+
+    return data_train, labels_train, data_test, labels_test
+
+
+if __name__ == "__main__":
+    data, labels = read_cifar()
+    a, b, c, d = split_dataset(data, labels, 0.8)
+    print(1)