diff --git a/.gitignore b/.gitignore
index d44086f2550ddf318e30c29d156b10bee1d43c02..43348139a1cbb20bc4cfc8b2c9cc6f861d9a2616 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 /data
+/.pytest_cache
 /__pycache__
-
+/tests/__pycache__
diff --git "a/Question 1 \303\240 9 ANN.txt" "b/Question 1 \303\240 9 ANN.txt"
new file mode 100644
index 0000000000000000000000000000000000000000..54a92be0b87905a2d717ca9772c3af7c59299e3a
--- /dev/null
+++ "b/Question 1 \303\240 9 ANN.txt"	
@@ -0,0 +1,30 @@
+2. On a  C = (a2 - Y)^2  alors dC/da2 = 2(a2-Y)
+
+3. On a dC/dz2 = dC/da2 * da2/dz2
+
+    Or z2 = a1*w2 + b2 et a2 = sigmoid(z2) donc da2/dz2 = sigmoid(z2) * (1 - sigmoid(z2))
+
+    dC/dz2 = dC/da2 * a2(1-a2)
+
+4. dC/dw2 = dC/dz2 * dz2/dw2 
+
+    Or z2 = a1*w2 + b2
+    
+    Donc dC/dw2 = dC/dz2 * a1
+
+
+5. dC/db2 = dz2/db2 * dC/dz2
+
+    Or dz2/db2 = 1
+
+    Donc dC/db2 = dC/dz2
+
+6. dC/da1 = dC/dz2 * dz2/da1 = dC/dz2 * w2
+
+
+7. dC/dz1 = dC/da1 * da1/dz1 = dC/da1 * a1(1-a1)
+
+8. dC/dw1 = dC/dz1 * dz1/dw1 = dC/dz1 * a0
+
+9. dC/db1 = dC/dz1 * dz1/db1 = dC/dz1 * 1 = dC/dz1
+    
\ No newline at end of file
diff --git a/cifar10.png b/cifar10.png
new file mode 100644
index 0000000000000000000000000000000000000000..c4c90bc73d25ff75c00f04da2de87f7dff612774
Binary files /dev/null and b/cifar10.png differ
diff --git a/mlp.py b/mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bc529546eb4b0b33da184569f960c1b8d4638d1
--- /dev/null
+++ b/mlp.py
@@ -0,0 +1,556 @@
+"""
+
+2. On a  C = (a2 - Y)^2  alors dC/da2 = 2(a2-Y)
+
+3. On a dC/dz2 = dC/da2 * da2/dz2
+
+    Or z2 = a1*w2 + b2 et a2 = sigmoid(z2) donc da2/dz2 = sigmoid(z2) * (1 - sigmoid(z2))
+
+    dC/dz2 = dC/da2 * a2(1-a2)
+
+4. dC/dw2 = dC/dz2 * dz2/dw2 
+
+    Or z2 = a1*w2 + b2
+    
+    Donc dC/dw2 = dC/dz2 * a1
+
+
+5. dC/db2 = dz2/db2 * dC/dz2
+
+    Or dz2/db2 = 1
+
+    Donc dC/db2 = dC/dz2
+
+6. dC/da1 = dC/dz2 * dz2/da1 = dC/dz2 * w2
+
+
+7. dC/dz1 = dC/da1 * da1/dz1 = dC/da1 * a1(1-a1)
+
+8. dC/dw1 = dC/dz1 * dz1/dw1 = dC/dz1 * a0
+
+9. dC/db1 = dC/dz1 * dz1/db1 = dC/dz1 * 1 = dC/dz1
+    
+
+"""
+
+
+# 10. learn_once_mse function
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from read_cifar import read_cifar, read_cifar_batch, split_dataset
+
+path = "data/cifar-10-batches-py/"
+
+
+def learn_once_mse(w1, w2, b1, b2, data, targets, learning_rate=0.00001):
+
+    """
+    perform one gradient descent step
+
+    :param
+       w1: weight of the network
+       w2: weight of the network
+       b1: biases of the network
+       b2: biases of the network
+       data: the data input of the network
+       targets: the desired output
+       learning_rate: the factor that changes the weights
+
+    :return:
+       w1: updated weight of the network
+       w2: updated weight of the network
+       b1: updated biases of the network
+       b2: updated biases of the network
+       loss: the loss that separates us from the desired output
+    """
+
+    # Forward pass
+
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # cas de la couche 2
+
+    # on a dC/dw2 = dz2/dw2 * da2/dz2 * dC/da2
+    # on a dC/db2 = dz2/db2 * da2/dz2 * dC/da2
+
+    var_z2_w2 = a1  # dz2/dw2
+    var_z2_b2 = np.ones((1, data.shape[0]), dtype=float)  # dz2/db2
+
+    var_a2_z2 = a2 * (1 - a2)  # da2/dz2  sigmoid(z2) * (1 - sigmoid(z2))
+
+    var_loss_a2 = 2 * (a2 - targets)  # dC/da2
+
+    var_loss_w2 = np.dot(var_z2_w2.T, var_a2_z2 * var_loss_a2)  # dC/dw2
+    var_loss_b2 = np.dot(var_z2_b2, var_a2_z2 * var_loss_a2)  # dC/db2
+
+    # cas de la couche 1
+
+    # on a dC/dw1 = dz1/dw1 * da1/dz1 * dC/da1
+    # on a dC/db1 = dz1/db1 * da1/dz1 * dC/da1
+
+    var_z1_w1 = a0  # dz1/dw1
+    var_z1_b1 = np.ones((1, data.shape[0]), dtype=float)  # dz1/db1
+
+    var_a1_z1 = a1 * (1 - a1)  # da1/dz1 = sigmoid(z1) * (1 - sigmoid(z1)) = a1*(1-a1)
+
+    var_z2_a1 = w2  # dz2/da1
+    var_loss_a1 = np.dot(
+        var_loss_a2 * var_a2_z2, var_z2_a1.T
+    )  # dC/da1 = dC/da2 * da2/dz2 * dz2/da1
+
+    var_loss_w1 = np.dot(var_z1_w1.T, var_a1_z1 * var_loss_a1)
+    var_loss_b1 = np.dot(var_z1_b1, var_a1_z1 * var_loss_a1)
+
+    # updated weights of the network
+
+    w1 = w1 - learning_rate * var_loss_w1
+    w2 = w2 - learning_rate * var_loss_w2
+
+    # updated biaises of the network
+
+    b1 = b1 - learning_rate * var_loss_b1
+    b2 = b2 - learning_rate * var_loss_b2
+
+    # Forward pass
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - targets))
+    print(loss)
+
+    return w1, w2, b1, b2, loss
+
+
+# 11. one_hot function
+
+
+def one_hot(array):
+
+    """
+    compute the one_hot_encoding of a 1D array
+
+    :param
+        array: a 1D array
+
+    :retrun:
+        one_hot_encoded_array: the corresponding one_hot matrix
+
+    """
+
+    one_hot_encoded_array = np.zeros((array.size, array.max() + 1))
+    one_hot_encoded_array[np.arange(array.size), array] = 1
+
+    return one_hot_encoded_array
+
+
+# 12. learn_once_cross_entropy
+
+
+def learn_once_cross_entropy(
+    w1, w2, b1, b2, data_train, labels_train, learning_rate=0.00001
+):
+
+    """
+    perform one gradient descent step using a binary cross_entropy loss
+
+    :param
+       w1: weight of the network
+       w2: weight of the network
+       b1: biases of the network
+       b2: biases of the network
+       data_train: the train set data which is the input of the network
+       labels_train: the desired output will play the rule of target
+       learning_rate: the factor that changes the weights
+
+    :return:
+       w1: updated weight of the network
+       w2: updated weight of the network
+       b1: updated biases of the network
+       b2: updated biases of the network
+       loss: the loss that separates us from the desired output
+    """
+
+    # we admit that dC/dz2 = a2 - Y where Y = one_hot(labels_train)
+
+    # Forward pass
+    a0 = data_train  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # cas de la couche 2
+
+    # on a dC/dw2 = dz2/dw2 * dC/dz2
+    # on a dC/db2 = dz2/db2 * dC/dz2
+
+    var_z2_w2 = a1  # dz2/dw2
+    var_z2_b2 = np.ones((1, data_train.shape[0]), dtype=float)  # dz2/db2
+
+    var_loss_z2 = a2 - one_hot(labels_train)  # dC/dz2
+
+    var_loss_w2 = np.dot(var_z2_w2.T, var_loss_z2)  # dC/dw2
+    var_loss_b2 = np.dot(var_z2_b2, var_loss_z2)  # dC/db2
+
+    # cas de la couche 1
+
+    # on a dC/dw1 = dz1/dw1 * da1/dz1 * dC/da1 = dz1/dw1 * da1/dz1 * dC/dz2 * dz2/da1
+    # on a dC/dz1 = dz1/db1 * da1/dz1 * dC/da1 = dz1/db1 * da1/dz1 * dC/dz2 * dz2/da1
+
+    var_z1_w1 = a0  # dz1/dw1
+    var_z1_b1 = np.ones((1, data_train.shape[0]), dtype=float)  # dz1/db1
+
+    var_a1_z1 = a1 * (1 - a1)  # da1/dz1 = sigmoid(z1) * (1 - sigmoid(z1)) = a1*(1-a1)
+
+    var_z2_a1 = w2  # dz2/da1
+    var_loss_a1 = np.dot(var_loss_z2, var_z2_a1.T)  # dC/da1 = dC/dz2 * dz2/da1
+
+    var_loss_w1 = np.dot(var_z1_w1.T, var_a1_z1 * var_loss_a1)
+    var_loss_b1 = np.dot(var_z1_b1, var_a1_z1 * var_loss_a1)
+
+    # updated weights of the network
+
+    w1 = w1 - learning_rate * var_loss_w1
+    w2 = w2 - learning_rate * var_loss_w2
+
+    # updated biaises of the network
+
+    b1 = b1 - learning_rate * var_loss_b1
+    b2 = b2 - learning_rate * var_loss_b2
+
+    # Forward pass
+    a0 = data_train  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - one_hot(labels_train)))
+    print(loss)
+
+    return w1, w2, b1, b2, loss
+
+
+# 13. train_mlp function
+
+# first we write a function that will compute the accuracy
+def compute_accuracy(target, prediction):
+    correct = 0
+    for i in range(target.shape[0]):
+        if np.argmax(target[i]) == np.argmax(prediction[i]):
+            correct += 1
+    precision = correct / target.shape[0]
+    return precision
+
+
+def train_mlp(
+    w1, w2, b1, b2, data_train, labels_train, learning_rate=0.00001, num_epoch=3
+):
+
+    """
+    perform gradient descent step using a binary cross_entropy loss looping on the num_epoch
+    this function will train our model so that the weights and biases are well updated
+
+    :param
+       w1: weight of the network
+       w2: weight of the network
+       b1: biases of the network
+       b2: biases of the network
+       data_train: the train set data which is the input of the network
+       labels_train: the desired output will play the rule of target
+       learning_rate: the factor that changes the weights
+       num_epoch: number of time to reapeat the gradient descent
+
+    :return:
+       w1: updated weight of the network
+       w2: updated weight of the network
+       b1: updated biases of the network
+       b2: updated biases of the network
+       train_accuracies: the list of training accuracy across num_epoch
+       losses: the list of loss that separates us from the desired output across the num_epoch
+    """
+
+    # Forward pass
+    a0 = data_train  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    train_accuracies = []
+    losses = []
+
+    for i in range(num_epoch):
+
+        # cas de la couche 2
+
+        # on a dC/dw2 = dz2/dw2 * dC/dz2
+        # on a dC/db2 = dz2/db2 * dC/dz2
+
+        var_z2_w2 = a1  # dz2/dw2
+        var_z2_b2 = np.ones((1, data_train.shape[0]), dtype=float)  # dz2/db2
+
+        var_loss_z2 = a2 - one_hot(labels_train)  # dC/dz2
+
+        var_loss_w2 = np.dot(var_z2_w2.T, var_loss_z2)  # dC/dw2
+        var_loss_b2 = np.dot(var_z2_b2, var_loss_z2)  # dC/db2
+
+        # cas de la couche 1
+
+        # on a dC/dw1 = dz1/dw1 * da1/dz1 * dC/da1 = dz1/dw1 * da1/dz1 * dC/dz2 * dz2/da1
+        # on a dC/dz1 = dz1/db1 * da1/dz1 * dC/da1 = dz1/db1 * da1/dz1 * dC/dz2 * dz2/da1
+
+        var_z1_w1 = a0  # dz1/dw1
+        var_z1_b1 = np.ones((1, data_train.shape[0]), dtype=float)  # dz1/db1
+
+        var_a1_z1 = a1 * (
+            1 - a1
+        )  # da1/dz1 = sigmoid(z1) * (1 - sigmoid(z1)) = a1*(1-a1)
+
+        var_z2_a1 = w2  # dz2/da1
+        var_loss_a1 = np.dot(var_loss_z2, var_z2_a1.T)  # dC/da1 = dC/dz2 * dz2/da1
+
+        var_loss_w1 = np.dot(var_z1_w1.T, var_a1_z1 * var_loss_a1)
+        var_loss_b1 = np.dot(var_z1_b1, var_a1_z1 * var_loss_a1)
+
+        # updated weights of the network
+
+        w1 = w1 - learning_rate * var_loss_w1
+        w2 = w2 - learning_rate * var_loss_w2
+
+        # updated biaises of the network
+
+        b1 = b1 - learning_rate * var_loss_b1
+        b2 = b2 - learning_rate * var_loss_b2
+
+        # Forward pass
+        a0 = data_train  # the data are the input of the first layer
+        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+        a1 = 1 / (
+            1 + np.exp(-z1)
+        )  # output of the hidden layer (sigmoid activation function)
+        z2 = np.matmul(a1, w2) + b2  # input of the output layer
+        a2 = 1 / (
+            1 + np.exp(-z2)
+        )  # output of the output layer (sigmoid activation function)
+        predictions = a2  # the predicted values are the outputs of the output layer
+
+        # Compute loss (MSE)
+        loss = np.mean(np.square(predictions - one_hot(labels_train)))
+        losses.append(loss)
+
+        # Compute accuracy
+        acc = compute_accuracy(one_hot(labels_train), predictions)
+        train_accuracies.append(acc)
+
+    return w1, w2, b1, b2, train_accuracies, losses
+
+
+# 14. test_mpl function
+
+
+def test_mlp(w1, w2, b1, b2, data_test, labels_test):
+
+    """
+    the function perform a forward pass to test our trained model accuracy
+
+    :param
+        w1: weight of the network
+        w2: weight of the network
+        b1: biases of the network
+        b2: biases of the network
+        data_test: the test set data
+        labels_test: the corresponding labels for the data_test with whom we'll compare the predictions
+
+    :return:
+        test_accuracy: the testing accuracy
+
+    """
+
+    # Forward pass
+    a0 = data_test  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute accuracy
+    test_accuracy = compute_accuracy(one_hot(labels_test), predictions)
+
+    return test_accuracy
+
+
+# 15. run_mlp_training function
+
+
+def run_mlp_training(
+    data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch
+):
+
+    """
+    train an MLP classifier compute taining accuracies across num_epoch and the testing_accuracy
+
+    :param
+        data_train: the training data
+        labels_train: the corresponding labels of the training data
+        data_test: the test data
+        labels_test: the corresponding labels of the test data
+        d_h: number of neurons in the hidden layer
+        learning_rate: the factor that changes the weights
+        num_epoch: number of time to reapeat the gradient descent
+
+    :return:
+
+        test_accuracy: the testing accuracy
+        train_accuracies: the list of training accuracy across num_epoch
+        losses: the list of loss that separates us from the desired output across the num_epoch
+
+
+    """
+
+    d_in = data_train.shape[1]
+    d_out = 10
+
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+
+    # Forward pass
+    a0 = data_train  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    w1, w2, b1, b2, train_accuracies, losses = train_mlp(
+        w1, w2, b1, b2, data_train, labels_train, learning_rate, num_epoch
+    )
+    test_accuracy = test_mlp(w1, w2, b1, b2, data_test, labels_test)
+
+    return train_accuracies, test_accuracy, losses
+
+
+if __name__ == "__main__":
+
+    N = 30  # number of input data
+    d_in = 3  # input dimension
+    d_h = 3  # number of neurons in the hidden layer
+    d_out = 2  # output dimension (number of neurons of the output layer)
+
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+
+    data = np.random.rand(N, d_in)  # create a random data
+    targets = np.random.rand(N, d_out)  # create a random targets
+
+    # test of learn_once_mse
+    w1, w2, b1, b2, loss = learn_once_mse(
+        w1, w2, b1, b2, data, targets, learning_rate=0.00001
+    )
+
+    # test of one_hot
+    print(one_hot(np.array([1, 2, 5])))
+
+    # test of learn_once_cross_entropy
+    data, labels = read_cifar_batch(path + "data_batch_1")
+    data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.2)
+
+    d_in = data_train.shape[1]  # input dimension
+    d_h = 10  # number of neurons in the hidden layer
+    d_out = 10  # output dimension (number of neurons of the output layer)
+
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+
+    w1, w2, b1, b2, loss = learn_once_cross_entropy(
+        w1, w2, b1, b2, data_train, labels_train, learning_rate=0.00001
+    )
+
+    # test of train_mlp
+    w1, w2, b1, b2, train_accuracies, losses = train_mlp(
+        w1, w2, b1, b2, data_train, labels_train, learning_rate=0.00001, num_epoch=100
+    )
+
+    # test of test_mlp
+    test_accuracy = test_mlp(w1, w2, b1, b2, data_test, labels_test)
+    print(test_accuracy)
+
+    # test of run_mlp_training
+    data, labels = read_cifar(path)
+    data_train, data_test, labels_train, labels_test = split_dataset(data, labels, 0.9)
+
+    train_accuracies, test_accuracy, losses = run_mlp_training(
+        data_train,
+        labels_train,
+        data_test,
+        labels_test,
+        d_h=64,
+        learning_rate=0.00001,
+        num_epoch=100,
+    )
+
+    test_accuracy
+
+    # plt.plot(losses)
+    # plt.title("losses")
+    # plt.savefig("mlp_loss.png", bbox_inches="tight")
+    # plt.show()
+
+    plt.plot(train_accuracies)
+    plt.title("Accuracies")
+    plt.savefig("mlp.png", bbox_inches="tight")
+    plt.show()
diff --git a/results/knn.png b/results/knn.png
new file mode 100644
index 0000000000000000000000000000000000000000..42b69ce5c6f5859ebf739dc56b744c92530ab3f9
Binary files /dev/null and b/results/knn.png differ
diff --git a/results/mlp.png b/results/mlp.png
new file mode 100644
index 0000000000000000000000000000000000000000..68ca10d1cd6aa1a1f5a34982e9cab40432a13d9f
Binary files /dev/null and b/results/mlp.png differ
diff --git a/results/mlp_loss.png b/results/mlp_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..e81bd776d4f303e838b4b041587e92cef67ea894
Binary files /dev/null and b/results/mlp_loss.png differ
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5098604104459e52c4f790a270c1b81fafd4651
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,3 @@
+from .knn_test import *
+from .mlp_test import *
+from .read_cifar_test import *
diff --git a/tests/knn_test.py b/tests/knn_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..54fb46582220654bc4ff63bcefef8990973b78e8
--- /dev/null
+++ b/tests/knn_test.py
@@ -0,0 +1,114 @@
+import unittest
+
+import numpy as np
+
+from knn import *
+from read_cifar import *
+
+
+# testing the correct execution of equal_shape_distance_matrix
+def test_equal_shape_distance_matrix():
+
+    X = np.array([[1, 2], [0, 3]])
+    V = np.array([[3, 2], [1, 4]])
+
+    expected_dist = np.array([[4, -1], [5, 2]])  # X*X.T + V*V.T - 2*X*V.T
+
+    dist = equal_shape_distance_matrix(X, V)
+    assert np.all(dist == expected_dist)
+
+
+# testing the correct execution of equal_shape_distance_matrix
+def test_equal_shape_distance_matrix():
+
+    X = np.array([[1, 2], [0, 3]])
+    V = np.array([[3, 2], [1, 4]])
+
+    expected_dist = np.array([[4, -1], [5, 2]])  # X*X.T + V*V.T - 2*X*V.T
+
+    dist = equal_shape_distance_matrix(X, V)
+    assert np.all(dist == expected_dist)
+
+
+# testing the correct execution of distance_matrix
+def test_distance_matrix():
+
+    data_train = np.array([[1, 2], [0, 3], [3, 2], [1, 4]])
+    data_test = np.array([[0, 2], [1, 3]])
+
+    dist = distance_matrix(data_train, data_test)
+
+    first_dist = equal_shape_distance_matrix(
+        data_train[: data_test.shape[0]], data_test
+    )
+    second_dist = equal_shape_distance_matrix(
+        data_train[data_test.shape[0] : data_test.shape[0] * 2], data_test
+    )
+
+    expected_dist = np.concatenate((first_dist, second_dist), axis=1)
+
+    assert np.all(dist == expected_dist)
+
+
+# test the right execution of knn_predict
+def test_knn_predict():
+
+    data, labels = read_cifar(path)
+
+    data_train, data_test, labels_train, labels_test = split_dataset(
+        data, labels, split=0.2
+    )
+
+    assert data_train.shape[0] == 48000
+
+    num_test = 2000
+    mask = list(range(num_test))
+    data_test = data_test[mask]
+    labels_test = labels_test[mask]
+
+    num_train = 2000
+    mask = list(range(num_train))
+    data_train = data_train[mask]
+    labels_train = labels_train[mask]
+
+    dists = distance_matrix(data_train, data_test)
+    predicted_label = knn_predict(labels_train, dists, k=3)
+
+    assert predicted_label.shape[0] == labels_test.shape[0]
+
+
+# test the right execution of evaluate_knn
+def test_evaluate_knn():
+
+    data, labels = read_cifar(path)
+
+    data_train, data_test, labels_train, labels_test = split_dataset(
+        data, labels, split=0.2
+    )
+
+    assert data_train.shape[0] == 48000
+
+    num_test = 2000
+    mask = list(range(num_test))
+    data_test = data_test[mask]
+    labels_test = labels_test[mask]
+
+    num_train = 2000
+    mask = list(range(num_train))
+    data_train = data_train[mask]
+    labels_train = labels_train[mask]
+
+    dists = distance_matrix(data_train, data_test)
+    y_test_pred = knn_predict(labels_train, dists, k=3)
+
+    # total number of predictions
+    num_test = dists.shape[0]
+    # number of correct predictions
+    correct = np.sum(y_test_pred == labels_test)
+    # accuracy
+    predicted_accuracy = float(correct) / num_test
+
+    accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k=3)
+
+    assert predicted_accuracy == accuracy
+
diff --git a/tests/mlp_test.py b/tests/mlp_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c11a8d53e05f43869da88c4f2d638df659451e59
--- /dev/null
+++ b/tests/mlp_test.py
@@ -0,0 +1,18 @@
+import numpy as np
+
+from mlp import one_hot
+
+# test the right execution of one_hot
+
+
+def test_one_hot():
+
+    X = np.array([1, 2, 5])
+
+    expected_one_hot_encode = np.array(
+        [[0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1]]
+    )
+
+    one_hot_encode = one_hot(X)
+
+    assert np.all(one_hot_encode == expected_one_hot_encode)
diff --git a/tests/read_cifar_test.py b/tests/read_cifar_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..9601718f3209268e5910f83eb953699c604f3b06
--- /dev/null
+++ b/tests/read_cifar_test.py
@@ -0,0 +1,26 @@
+from read_cifar import *
+
+
+# testing the right execution of the read_cifar_batch()
+def test_read_cifar_batch():
+
+    data, labels = read_cifar_batch(path + "data_batch_1")
+    assert data.shape[0] == 10000
+
+
+# testing the right execution of the read_cifar()
+def test_read_cifar():
+
+    data, labels = read_cifar(path)
+    assert data.shape[0] == 60000
+
+
+# testing the right execution of split_datset
+def test_split_dataset():
+    data, labels = read_cifar(path)
+    assert data.shape[0] == 60000
+    data_train, data_test, labels_train, labels_test = split_dataset(
+        data, labels, split=0.2
+    )
+    assert data_train.shape[0] == 48000
+    assert data_test.shape[0] == 12000