jusqu'à Q13

a95e44f9 · Jangberry (Nomad-Debian) · 6e2e532b · a95e44f9 · 6e2e532b · a95e44f9
Unverified Commit a95e44f9 authored Nov 10, 2023 by Jangberry (Nomad-Debian)
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
 {
    "python.analysis.autoImportCompletions": true,
-    "python.analysis.typeCheckingMode": "off"
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
 }
\ No newline at end of file
--- a/Artificial Neural Network.ipynb
+++ b/Artificial Neural Network.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import read_cifar\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
-%% Cell type:code id: tags:
-
-``` python
-import read_cifar
-import numpy as np
-```
-
-%% Cell type:markdown id: tags:
-
--- a/LICENSE
+++ b/LICENSE
 MIT License

-Copyright (c) 2022 Quentin GALLOUÉDEC
+Copyright (c) 2023 [Matthieu Massardier]

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

--- a/LICENSE-tutorial.md
+++ b/LICENSE-tutorial.md
+MIT License
+
+Copyright (c) 2022 Quentin GALLOUÉDEC
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -2,9 +2,9 @@

 ## Setup

-1. Download the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder.
-1. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`.
-1. Install the requirements using `pip install -r requirements.txt`.
+1. Download and extract the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder.
+2. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`.
+3. Install the requirements using `pip install -r requirements.txt`.

 ## Usage

@@ -13,12 +13,12 @@ Otherwise here is a test result: ![knn test result](./results/knn.png)

 ## Some proofs

-### *1. Prove that $`\sigma' = \sigma \times (1-\sigma)`$*
+### *1\. Prove that $*`\sigma' = \sigma \times (1-\sigma)`$

 To prove that, let's firt derive the sigmoid function:  
-$\sigma(x) = \frac{1}{1+e^{-x}}$  
-so $\sigma'(x)=\frac{e^{-x}}{(1+e^{-x})^2}$  
-$\sigma'(x)=\frac{1}{1+e^{-x}}\times\frac{e^{-x}}{1+e^{-x}}$  
-Here we can identify $\frac{1}{1+e^{-x}} = \sigma(x)$  
-And $\frac{e^{-x}}{1+e^{-x}} = 1 - \frac{1}{1+e^{x}}$  
-So $\sigma'(x) = \sigma(x) \times (1 - \sigma(x))$
\ No newline at end of file
+$\\sigma(x) = \\frac{1}{1+e^{-x}}$  
+so $\\sigma'(x)=\\frac{e^{-x}}{(1+e^{-x})^2}$  
+$\\sigma'(x)=\\frac{1}{1+e^{-x}}\\times\\frac{e^{-x}}{1+e^{-x}}$  
+Here we can identify $\\frac{1}{1+e^{-x}} = \\sigma(x)$  
+And $\\frac{e^{-x}}{1+e^{-x}} = 1 - \\frac{1}{1+e^{x}}$  
+So $\\sigma'(x) = \\sigma(x) \\times (1 - \\sigma(x))$
--- a/mlp.py
+++ b/mlp.py
+import numpy as np
+
+
+def learn_once_mse(w1: np.array, b1: int, w2: np.array, b2: int, data: np.array, target: np.array, learning_rate: float):
+    """
+    Performs one learning step of the MLP with MSE loss
+
+    Arguments:
+        w1 -- weights of the hidden layer
+        b1 -- biaises of the hidden layer
+        w2 -- weights of the output layer
+        b2 -- biaises of the output layer
+        data -- input data
+        target -- target values
+        learning_rate -- learning rate
+    Returns:
+        w1 -- updated weights of the hidden layer
+        b1 -- updated biaises of the hidden layer
+        w2 -- updated weights of the output layer
+        b2 -- updated biaises of the output layer
+        loss -- loss of the forward pass
+    """
+    # Forward pass
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    # output of the hidden layer (sigmoid activation function)
+    a1 = 1 / (1 + np.exp(-z1))
+
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    # output of the output layer (sigmoid activation function)
+    a2 = 1 / (1 + np.exp(-z2))
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - target))
+
+    # Backward pass
+    # derivative of the loss with respect to the output of the output layer
+    dC_dA2 = 2 / len(b2) * (predictions - target)
+    # derivative of the loss with respect to the input of the output layer
+    dC_dZ2 = dC_dA2 * (1 - predictions) * predictions
+    # derivative of the loss with respect to the weights of the output layer
+    dC_dW2 = np.matmul(a1.T, dC_dZ2)
+    # derivative of the loss with respect to the biaises of the output layer
+    dC_dB2 = np.sum(dC_dZ2)
+
+    # derivative of the loss with respect to the output of the hidden layer
+    dC_dA1 = np.matmul(dC_dZ2, w2.T)
+    # derivative of the loss with respect to the input of the hidden layer
+    dC_dZ1 = dC_dA1 * (1 - a1) * a1
+    # derivative of the loss with respect to the weights of the hidden layer
+    dC_dW1 = np.matmul(a0.T, dC_dZ1)
+    # derivative of the loss with respect to the biaises of the hidden layer
+    dC_dB1 = np.sum(dC_dZ1)
+
+    # Update weights and biaises
+    w1 -= learning_rate * dC_dW1
+    b1 -= learning_rate * dC_dB1
+    w2 -= learning_rate * dC_dW2
+    b2 -= learning_rate * dC_dB2
+
+    return w1, b1, w2, b2, loss
+
+
+def one_hot(labels: np.array):
+    """
+    Converts a vector of labels into a one-hot matrix
+
+    Arguments:
+        labels -- vector of labels
+    Returns:
+        one_hot_matrix -- one-hot matrix
+    """
+    one_hot_matrix = np.zeros((len(labels), np.max(labels) + 1))
+    one_hot_matrix[np.arange(len(labels)), labels] = 1
+    return one_hot_matrix
+
+
+def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.array, data: np.array, labels_train: np.array, learning_rate: int):
+    """
+    Performs one learning step of the MLP with cross-entropy loss
+
+    Arguments:
+        w1 -- weights of the hidden layer
+        b1 -- biaises of the hidden layer
+        w2 -- weights of the output layer
+        b2 -- biaises of the output layer
+        data -- input data
+        labels_train -- labels of the training data
+        learning_rate -- learning rate
+    Returns:
+        w1 -- updated weights of the hidden layer
+        b1 -- updated biaises of the hidden layer
+        w2 -- updated weights of the output layer
+        b2 -- updated biaises of the output layer
+        loss -- loss of the forward pass
+    """
+    # Forward pass
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    # output of the hidden layer (sigmoid activation function)
+    a1 = 1 / (1 + np.exp(-z1))
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    # output of the output layer (sigmoid activation function)
+    a2 = 1 / (1 + np.exp(-z2))
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute loss (cross-entropy)
+    loss = -np.mean(np.sum(labels_train * np.log(predictions) +
+                    (1 - labels_train) * np.log(1 - predictions), axis=1))
+
+    # Backward pass
+    # derivative of the loss with respect to the output of the output layer
+    dC_dA2 = -labels_train / predictions + (1 - labels_train) / (1 - predictions)
+    # derivative of the loss with respect to the input of the output layer
+    # dC_dZ2 = a2 - 
+
+    # Update weights and biaises
+    w1 -= learning_rate * dC_dW1
+    b1 -= learning_rate * dC_dB1
+    w2 -= learning_rate * dC_dW2
+    b2 -= learning_rate * dC_dB2
+
+    return w1, b1, w2, b2, loss
+
+
+if __name__ == "__main__":
+    N = 30  # number of input data
+    d_in = 3  # input dimension
+    d_h = 3  # number of neurons in the hidden layer
+    d_out = 2  # output dimension (number of neurons of the output layer)
+
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+
+    data = np.random.rand(N, d_in)  # create a random data
+    targets = np.random.rand(N, d_out)  # create a random targets
+
+    for i in range(100):
+        w1, b1, w2, b2, loss = learn_once_mse(
+            w1, b1, w2, b2, data, targets, 0.1)
+        print(loss)
+
+    print(one_hot(np.array([9, 1, 3, 0, 6, 5, 2, 7, 8, 4])))
--- a/read_cifar.py
+++ b/read_cifar.py
@@ -75,41 +75,6 @@ def split_dataset(data: np.array, labels: np.array, split: float):
    
    return data_train, labels_train, data_test, labels_test

-def test_split_dataset():
-    cifar_data, cifar_labels = read_cifar("data/cifar-10-batches-py/")
-    split = 0.8
-
-    datapoint_size = cifar_data.shape[1]
-
-    data_train, labels_train, data_test, labels_test = split_dataset(
-        cifar_data, cifar_labels, split
-    )
-
-    expected_train_size = round(split * cifar_data.shape[0])
-    expected_test_size = cifar_data.shape[0] - expected_train_size
-
-    # check dimensions
-    assert data_train.shape == (expected_train_size, datapoint_size)
-    assert labels_train.shape == (expected_train_size,)
-    assert data_test.shape == (expected_test_size, datapoint_size)
-    assert labels_test.shape == (expected_test_size,)
-
-    # check types
-    assert data_train.dtype == cifar_data.dtype
-    assert labels_train.dtype == cifar_labels.dtype
-    assert data_test.dtype == cifar_data.dtype
-    assert labels_test.dtype == cifar_labels.dtype
-
-    # check that the first data and label are still in the dataset and match indices
-    # pick random index in the training set
-    i = np.random.randint(0, expected_train_size)
-    random_data = data_train[i]
-    random_label = labels_train[i]
-    # check that the data and label match in original dataset
-    original_index = np.where(np.all(cifar_data == random_data, axis=1))[0][0]
-    assert cifar_labels[original_index] == random_label
-
-
 if __name__ == "__main__":
    images, labels = read_cifar("data/cifar-10-batches-py/")
    print(images.shape)
@@ -123,5 +88,4 @@ if __name__ == "__main__":
    print(labels_train[0])
    print(data_test[0])
    print(labels_test[0])
-    test_split_dataset()
    
\ No newline at end of file
--- a/tests/__init__.py
+++ b/tests/__init__.py
--- a/tests/test_knn.py
+++ b/tests/test_knn.py
+import numpy as np
+import sys
+import pytest
+from knn import *
+
+def test_distance_matrix():
+    X = np.array([[1, 0], [0, 0]])
+    Y = np.array([[1, 1], [0, 1]])
+    result = distance_matrix(X, Y)
+    expected = np.array([[1, np.sqrt(2)], [np.sqrt(2), 1]])
+    
+    np.testing.assert_equal(result, expected)
+
+def test_knn_predict():
+    dist = np.array([[1, 4], [3, 2]])
+    labels_train = np.array([0, 1])
+    k = 1
+    result = knn_predict(dist, labels_train, k)
+    expected = np.array([0, 1])
+    
+    np.testing.assert_array_equal(result, expected)
+    
+def test_evaluate_knn():
+    data_train = np.array([[1, 2], [3, 4]])
+    labels_train = np.array([0, 1])
+    data_test = np.array([[5, 6], [7, 8]])
+    labels_test = np.array([1, 0])
+    k = 1
+    result = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
+    
+    assert result == 0.5
\ No newline at end of file
--- a/tests/test_mlp.py
+++ b/tests/test_mlp.py
+import numpy as np
+import sys
+import pytest
+from mlp import *
+
+
+def test_learn_once_mse():
+    N = 30  # number of input data
+    d_in = 3  # input dimension
+    d_h = 3  # number of neurons in the hidden layer
+    d_out = 2  # output dimension (number of neurons of the output layer)
+
+    # Initialization of the network weights and biaises
+    w1 = np.zeros((d_in, d_h))  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = np.zeros((d_h, d_out))  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+
+    data = np.random.rand(N, d_in)  # create a random data
+    targets = np.random.rand(N, d_out)  # create a random targets
+
+    w1, b1, w2, b2, loss = learn_once_mse(
+            w1, b1, w2, b2, data, targets, 0.1)
+    
+    w1, b1, w2, b2, loss2 = learn_once_mse(
+            w1, b1, w2, b2, data, targets, 0.1)
+
+    assert loss2 < loss
+
+def test_one_hot():
+    indices = np.array([2, 0, 1])
+    result = one_hot(indices)
+    expected = np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
+    
+    np.testing.assert_array_equal(result, expected)
--- a/tests/test_read_cifar.py
+++ b/tests/test_read_cifar.py
+import numpy as np
+import sys
+import pytest
+from read_cifar import *
+
+def test_read_cifar_batch():
+    data, labels = read_cifar_batch("data/cifar-10-batches-py/data_batch_1")
+    
+    assert data.shape == (10000, 3072)
+    assert labels.shape == (10000,)
+    assert data.dtype == np.float32
+    assert labels.dtype == np.int64
+
+def test_read_cifar():
+    data, labels = read_cifar("data/cifar-10-batches-py/")
+    
+    assert data.shape == (60000, 3072)
+    assert labels.shape == (60000,)
+    assert data.dtype == np.float32
+    assert labels.dtype == np.int64
+
+def test_split_dataset():
+    cifar_data, cifar_labels = read_cifar("data/cifar-10-batches-py/")
+    split = 0.8
+
+    datapoint_size = cifar_data.shape[1]
+
+    data_train, labels_train, data_test, labels_test = split_dataset(
+        cifar_data, cifar_labels, split
+    )
+
+    expected_train_size = round(split * cifar_data.shape[0])
+    expected_test_size = cifar_data.shape[0] - expected_train_size
+
+    # check dimensions
+    assert data_train.shape == (expected_train_size, datapoint_size)
+    assert labels_train.shape == (expected_train_size,)
+    assert data_test.shape == (expected_test_size, datapoint_size)
+    assert labels_test.shape == (expected_test_size,)
+
+    # check types
+    assert data_train.dtype == cifar_data.dtype
+    assert labels_train.dtype == cifar_labels.dtype
+    assert data_test.dtype == cifar_data.dtype
+    assert labels_test.dtype == cifar_labels.dtype
+
+    # check that the first data and label are still in the dataset and match indices
+    # pick random index in the training set
+    i = np.random.randint(0, expected_train_size)
+    random_data = data_train[i]
+    random_label = labels_train[i]
+    # check that the data and label match in original dataset
+    original_index = np.where(np.all(cifar_data == random_data, axis=1))[0][0]
+    assert cifar_labels[original_index] == random_label