diff --git a/.vscode/settings.json b/.vscode/settings.json
index 56c56a008f2e2c16e1ddfe27a1a1e819189caee0..5e9d33d4b09a2691cfd56d9c0dc6c0a1c60a472c 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,8 @@
 {
     "python.analysis.autoImportCompletions": true,
-    "python.analysis.typeCheckingMode": "off"
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
 }
\ No newline at end of file
diff --git a/Artificial Neural Network.ipynb b/Artificial Neural Network.ipynb
deleted file mode 100644
index 1f366ba834ce2b8ef4965f4ec95347b45baca31c..0000000000000000000000000000000000000000
--- a/Artificial Neural Network.ipynb	
+++ /dev/null
@@ -1,40 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import read_cifar\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/LICENSE b/LICENSE
index 0c040bcb1c8109eaa3b67e4f0776cb94e3fd444b..ccec6e3eb803db2e55bf028616ea76b3573f6f99 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2022 Quentin GALLOUÉDEC
+Copyright (c) 2023 [Matthieu Massardier]
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.
diff --git a/LICENSE-tutorial.md b/LICENSE-tutorial.md
new file mode 100644
index 0000000000000000000000000000000000000000..0c040bcb1c8109eaa3b67e4f0776cb94e3fd444b
--- /dev/null
+++ b/LICENSE-tutorial.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Quentin GALLOUÉDEC
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
index a0ac53bfe5f9400196d065f768c29eb5613c7ec4..1f8b8418f004a0b9b91f48adcaa0d3d4b05edd97 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,9 @@
 
 ## Setup
 
-1. Download the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder.
-1. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`.
-1. Install the requirements using `pip install -r requirements.txt`.
+1. Download and extract the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder.
+2. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`.
+3. Install the requirements using `pip install -r requirements.txt`.
 
 ## Usage
 
@@ -13,12 +13,12 @@ Otherwise here is a test result: ![knn test result](./results/knn.png)
 
 ## Some proofs
 
-### *1. Prove that $`\sigma' = \sigma \times (1-\sigma)`$*
+### *1\. Prove that $*`\sigma' = \sigma \times (1-\sigma)`$
 
 To prove that, let's firt derive the sigmoid function:  
-$\sigma(x) = \frac{1}{1+e^{-x}}$  
-so $\sigma'(x)=\frac{e^{-x}}{(1+e^{-x})^2}$  
-$\sigma'(x)=\frac{1}{1+e^{-x}}\times\frac{e^{-x}}{1+e^{-x}}$  
-Here we can identify $\frac{1}{1+e^{-x}} = \sigma(x)$  
-And $\frac{e^{-x}}{1+e^{-x}} = 1 - \frac{1}{1+e^{x}}$  
-So $\sigma'(x) = \sigma(x) \times (1 - \sigma(x))$
\ No newline at end of file
+$\\sigma(x) = \\frac{1}{1+e^{-x}}$  
+so $\\sigma'(x)=\\frac{e^{-x}}{(1+e^{-x})^2}$  
+$\\sigma'(x)=\\frac{1}{1+e^{-x}}\\times\\frac{e^{-x}}{1+e^{-x}}$  
+Here we can identify $\\frac{1}{1+e^{-x}} = \\sigma(x)$  
+And $\\frac{e^{-x}}{1+e^{-x}} = 1 - \\frac{1}{1+e^{x}}$  
+So $\\sigma'(x) = \\sigma(x) \\times (1 - \\sigma(x))$
diff --git a/mlp.py b/mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f3546bc2cc0cd70caa5e924eb97cf5c09ea34b8
--- /dev/null
+++ b/mlp.py
@@ -0,0 +1,147 @@
+import numpy as np
+
+
+def learn_once_mse(w1: np.array, b1: int, w2: np.array, b2: int, data: np.array, target: np.array, learning_rate: float):
+    """
+    Performs one learning step of the MLP with MSE loss
+
+    Arguments:
+        w1 -- weights of the hidden layer
+        b1 -- biaises of the hidden layer
+        w2 -- weights of the output layer
+        b2 -- biaises of the output layer
+        data -- input data
+        target -- target values
+        learning_rate -- learning rate
+    Returns:
+        w1 -- updated weights of the hidden layer
+        b1 -- updated biaises of the hidden layer
+        w2 -- updated weights of the output layer
+        b2 -- updated biaises of the output layer
+        loss -- loss of the forward pass
+    """
+    # Forward pass
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    # output of the hidden layer (sigmoid activation function)
+    a1 = 1 / (1 + np.exp(-z1))
+
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    # output of the output layer (sigmoid activation function)
+    a2 = 1 / (1 + np.exp(-z2))
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - target))
+
+    # Backward pass
+    # derivative of the loss with respect to the output of the output layer
+    dC_dA2 = 2 / len(b2) * (predictions - target)
+    # derivative of the loss with respect to the input of the output layer
+    dC_dZ2 = dC_dA2 * (1 - predictions) * predictions
+    # derivative of the loss with respect to the weights of the output layer
+    dC_dW2 = np.matmul(a1.T, dC_dZ2)
+    # derivative of the loss with respect to the biaises of the output layer
+    dC_dB2 = np.sum(dC_dZ2)
+
+    # derivative of the loss with respect to the output of the hidden layer
+    dC_dA1 = np.matmul(dC_dZ2, w2.T)
+    # derivative of the loss with respect to the input of the hidden layer
+    dC_dZ1 = dC_dA1 * (1 - a1) * a1
+    # derivative of the loss with respect to the weights of the hidden layer
+    dC_dW1 = np.matmul(a0.T, dC_dZ1)
+    # derivative of the loss with respect to the biaises of the hidden layer
+    dC_dB1 = np.sum(dC_dZ1)
+
+    # Update weights and biaises
+    w1 -= learning_rate * dC_dW1
+    b1 -= learning_rate * dC_dB1
+    w2 -= learning_rate * dC_dW2
+    b2 -= learning_rate * dC_dB2
+
+    return w1, b1, w2, b2, loss
+
+
+def one_hot(labels: np.array):
+    """
+    Converts a vector of labels into a one-hot matrix
+
+    Arguments:
+        labels -- vector of labels
+    Returns:
+        one_hot_matrix -- one-hot matrix
+    """
+    one_hot_matrix = np.zeros((len(labels), np.max(labels) + 1))
+    one_hot_matrix[np.arange(len(labels)), labels] = 1
+    return one_hot_matrix
+
+
+def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.array, data: np.array, labels_train: np.array, learning_rate: int):
+    """
+    Performs one learning step of the MLP with cross-entropy loss
+
+    Arguments:
+        w1 -- weights of the hidden layer
+        b1 -- biaises of the hidden layer
+        w2 -- weights of the output layer
+        b2 -- biaises of the output layer
+        data -- input data
+        labels_train -- labels of the training data
+        learning_rate -- learning rate
+    Returns:
+        w1 -- updated weights of the hidden layer
+        b1 -- updated biaises of the hidden layer
+        w2 -- updated weights of the output layer
+        b2 -- updated biaises of the output layer
+        loss -- loss of the forward pass
+    """
+    # Forward pass
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    # output of the hidden layer (sigmoid activation function)
+    a1 = 1 / (1 + np.exp(-z1))
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    # output of the output layer (sigmoid activation function)
+    a2 = 1 / (1 + np.exp(-z2))
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute loss (cross-entropy)
+    loss = -np.mean(np.sum(labels_train * np.log(predictions) +
+                    (1 - labels_train) * np.log(1 - predictions), axis=1))
+
+    # Backward pass
+    # derivative of the loss with respect to the output of the output layer
+    dC_dA2 = -labels_train / predictions + (1 - labels_train) / (1 - predictions)
+    # derivative of the loss with respect to the input of the output layer
+    # dC_dZ2 = a2 - 
+
+    # Update weights and biaises
+    w1 -= learning_rate * dC_dW1
+    b1 -= learning_rate * dC_dB1
+    w2 -= learning_rate * dC_dW2
+    b2 -= learning_rate * dC_dB2
+
+    return w1, b1, w2, b2, loss
+
+
+if __name__ == "__main__":
+    N = 30  # number of input data
+    d_in = 3  # input dimension
+    d_h = 3  # number of neurons in the hidden layer
+    d_out = 2  # output dimension (number of neurons of the output layer)
+
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+
+    data = np.random.rand(N, d_in)  # create a random data
+    targets = np.random.rand(N, d_out)  # create a random targets
+
+    for i in range(100):
+        w1, b1, w2, b2, loss = learn_once_mse(
+            w1, b1, w2, b2, data, targets, 0.1)
+        print(loss)
+
+    print(one_hot(np.array([9, 1, 3, 0, 6, 5, 2, 7, 8, 4])))
diff --git a/read_cifar.py b/read_cifar.py
index 29fa7c145be54f05120b80c18c69a487b84c0b40..86d78f2c126a3eebb61bbee7c9c62195002db4a2 100644
--- a/read_cifar.py
+++ b/read_cifar.py
@@ -75,41 +75,6 @@ def split_dataset(data: np.array, labels: np.array, split: float):
     
     return data_train, labels_train, data_test, labels_test
 
-def test_split_dataset():
-    cifar_data, cifar_labels = read_cifar("data/cifar-10-batches-py/")
-    split = 0.8
-
-    datapoint_size = cifar_data.shape[1]
-
-    data_train, labels_train, data_test, labels_test = split_dataset(
-        cifar_data, cifar_labels, split
-    )
-
-    expected_train_size = round(split * cifar_data.shape[0])
-    expected_test_size = cifar_data.shape[0] - expected_train_size
-
-    # check dimensions
-    assert data_train.shape == (expected_train_size, datapoint_size)
-    assert labels_train.shape == (expected_train_size,)
-    assert data_test.shape == (expected_test_size, datapoint_size)
-    assert labels_test.shape == (expected_test_size,)
-
-    # check types
-    assert data_train.dtype == cifar_data.dtype
-    assert labels_train.dtype == cifar_labels.dtype
-    assert data_test.dtype == cifar_data.dtype
-    assert labels_test.dtype == cifar_labels.dtype
-
-    # check that the first data and label are still in the dataset and match indices
-    # pick random index in the training set
-    i = np.random.randint(0, expected_train_size)
-    random_data = data_train[i]
-    random_label = labels_train[i]
-    # check that the data and label match in original dataset
-    original_index = np.where(np.all(cifar_data == random_data, axis=1))[0][0]
-    assert cifar_labels[original_index] == random_label
-
-
 if __name__ == "__main__":
     images, labels = read_cifar("data/cifar-10-batches-py/")
     print(images.shape)
@@ -123,5 +88,4 @@ if __name__ == "__main__":
     print(labels_train[0])
     print(data_test[0])
     print(labels_test[0])
-    test_split_dataset()
     
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tests/test_knn.py b/tests/test_knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f4b8c8d3dd13b71d56fa9db748445512f1f7813
--- /dev/null
+++ b/tests/test_knn.py
@@ -0,0 +1,31 @@
+import numpy as np
+import sys
+import pytest
+from knn import *
+
+def test_distance_matrix():
+    X = np.array([[1, 0], [0, 0]])
+    Y = np.array([[1, 1], [0, 1]])
+    result = distance_matrix(X, Y)
+    expected = np.array([[1, np.sqrt(2)], [np.sqrt(2), 1]])
+    
+    np.testing.assert_equal(result, expected)
+
+def test_knn_predict():
+    dist = np.array([[1, 4], [3, 2]])
+    labels_train = np.array([0, 1])
+    k = 1
+    result = knn_predict(dist, labels_train, k)
+    expected = np.array([0, 1])
+    
+    np.testing.assert_array_equal(result, expected)
+    
+def test_evaluate_knn():
+    data_train = np.array([[1, 2], [3, 4]])
+    labels_train = np.array([0, 1])
+    data_test = np.array([[5, 6], [7, 8]])
+    labels_test = np.array([1, 0])
+    k = 1
+    result = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
+    
+    assert result == 0.5
\ No newline at end of file
diff --git a/tests/test_mlp.py b/tests/test_mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..662faf04bd31092fa060de0b959e1bf10edf8bd9
--- /dev/null
+++ b/tests/test_mlp.py
@@ -0,0 +1,35 @@
+import numpy as np
+import sys
+import pytest
+from mlp import *
+
+
+def test_learn_once_mse():
+    N = 30  # number of input data
+    d_in = 3  # input dimension
+    d_h = 3  # number of neurons in the hidden layer
+    d_out = 2  # output dimension (number of neurons of the output layer)
+
+    # Initialization of the network weights and biaises
+    w1 = np.zeros((d_in, d_h))  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = np.zeros((d_h, d_out))  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+
+    data = np.random.rand(N, d_in)  # create a random data
+    targets = np.random.rand(N, d_out)  # create a random targets
+
+    w1, b1, w2, b2, loss = learn_once_mse(
+            w1, b1, w2, b2, data, targets, 0.1)
+    
+    w1, b1, w2, b2, loss2 = learn_once_mse(
+            w1, b1, w2, b2, data, targets, 0.1)
+
+    assert loss2 < loss
+
+def test_one_hot():
+    indices = np.array([2, 0, 1])
+    result = one_hot(indices)
+    expected = np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
+    
+    np.testing.assert_array_equal(result, expected)
diff --git a/tests/test_read_cifar.py b/tests/test_read_cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cf13a52436a5adfdc0a6879dcb9881ef031a23b
--- /dev/null
+++ b/tests/test_read_cifar.py
@@ -0,0 +1,54 @@
+import numpy as np
+import sys
+import pytest
+from read_cifar import *
+
+def test_read_cifar_batch():
+    data, labels = read_cifar_batch("data/cifar-10-batches-py/data_batch_1")
+    
+    assert data.shape == (10000, 3072)
+    assert labels.shape == (10000,)
+    assert data.dtype == np.float32
+    assert labels.dtype == np.int64
+
+def test_read_cifar():
+    data, labels = read_cifar("data/cifar-10-batches-py/")
+    
+    assert data.shape == (60000, 3072)
+    assert labels.shape == (60000,)
+    assert data.dtype == np.float32
+    assert labels.dtype == np.int64
+
+def test_split_dataset():
+    cifar_data, cifar_labels = read_cifar("data/cifar-10-batches-py/")
+    split = 0.8
+
+    datapoint_size = cifar_data.shape[1]
+
+    data_train, labels_train, data_test, labels_test = split_dataset(
+        cifar_data, cifar_labels, split
+    )
+
+    expected_train_size = round(split * cifar_data.shape[0])
+    expected_test_size = cifar_data.shape[0] - expected_train_size
+
+    # check dimensions
+    assert data_train.shape == (expected_train_size, datapoint_size)
+    assert labels_train.shape == (expected_train_size,)
+    assert data_test.shape == (expected_test_size, datapoint_size)
+    assert labels_test.shape == (expected_test_size,)
+
+    # check types
+    assert data_train.dtype == cifar_data.dtype
+    assert labels_train.dtype == cifar_labels.dtype
+    assert data_test.dtype == cifar_data.dtype
+    assert labels_test.dtype == cifar_labels.dtype
+
+    # check that the first data and label are still in the dataset and match indices
+    # pick random index in the training set
+    i = np.random.randint(0, expected_train_size)
+    random_data = data_train[i]
+    random_label = labels_train[i]
+    # check that the data and label match in original dataset
+    original_index = np.where(np.all(cifar_data == random_data, axis=1))[0][0]
+    assert cifar_labels[original_index] == random_label