From a95e44f98149bbfd2e6760d6c43efe4165bdc5f0 Mon Sep 17 00:00:00 2001 From: "Jangberry (Nomad-Debian)" <matt2001@hotmail.fr> Date: Fri, 10 Nov 2023 17:42:41 +0100 Subject: [PATCH] =?UTF-8?q?jusqu'=C3=A0=20Q13?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/settings.json | 6 +- Artificial Neural Network.ipynb | 40 --------- LICENSE | 4 +- LICENSE-tutorial.md | 21 +++++ README.md | 20 ++--- mlp.py | 147 ++++++++++++++++++++++++++++++++ read_cifar.py | 36 -------- tests/__init__.py | 0 tests/test_knn.py | 31 +++++++ tests/test_mlp.py | 35 ++++++++ tests/test_read_cifar.py | 54 ++++++++++++ 11 files changed, 305 insertions(+), 89 deletions(-) delete mode 100644 Artificial Neural Network.ipynb create mode 100644 LICENSE-tutorial.md create mode 100644 mlp.py create mode 100644 tests/__init__.py create mode 100644 tests/test_knn.py create mode 100644 tests/test_mlp.py create mode 100644 tests/test_read_cifar.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 56c56a0..5e9d33d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,8 @@ { "python.analysis.autoImportCompletions": true, - "python.analysis.typeCheckingMode": "off" + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true } \ No newline at end of file diff --git a/Artificial Neural Network.ipynb b/Artificial Neural Network.ipynb deleted file mode 100644 index 1f366ba..0000000 --- a/Artificial Neural Network.ipynb +++ /dev/null @@ -1,40 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import read_cifar\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/LICENSE b/LICENSE index 0c040bc..ccec6e3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Quentin GALLOUÉDEC +Copyright (c) 2023 [Matthieu Massardier] Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/LICENSE-tutorial.md b/LICENSE-tutorial.md new file mode 100644 index 0000000..0c040bc --- /dev/null +++ b/LICENSE-tutorial.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Quentin GALLOUÉDEC + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index a0ac53b..1f8b841 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ ## Setup -1. Download the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder. -1. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`. -1. Install the requirements using `pip install -r requirements.txt`. +1. Download and extract the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder. +2. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`. +3. Install the requirements using `pip install -r requirements.txt`. ## Usage @@ -13,12 +13,12 @@ Otherwise here is a test result:  ## Some proofs -### *1. Prove that $`\sigma' = \sigma \times (1-\sigma)`$* +### *1\. Prove that $*`\sigma' = \sigma \times (1-\sigma)`$ To prove that, let's firt derive the sigmoid function: -$\sigma(x) = \frac{1}{1+e^{-x}}$ -so $\sigma'(x)=\frac{e^{-x}}{(1+e^{-x})^2}$ -$\sigma'(x)=\frac{1}{1+e^{-x}}\times\frac{e^{-x}}{1+e^{-x}}$ -Here we can identify $\frac{1}{1+e^{-x}} = \sigma(x)$ -And $\frac{e^{-x}}{1+e^{-x}} = 1 - \frac{1}{1+e^{x}}$ -So $\sigma'(x) = \sigma(x) \times (1 - \sigma(x))$ \ No newline at end of file +$\\sigma(x) = \\frac{1}{1+e^{-x}}$ +so $\\sigma'(x)=\\frac{e^{-x}}{(1+e^{-x})^2}$ +$\\sigma'(x)=\\frac{1}{1+e^{-x}}\\times\\frac{e^{-x}}{1+e^{-x}}$ +Here we can identify $\\frac{1}{1+e^{-x}} = \\sigma(x)$ +And $\\frac{e^{-x}}{1+e^{-x}} = 1 - \\frac{1}{1+e^{x}}$ +So $\\sigma'(x) = \\sigma(x) \\times (1 - \\sigma(x))$ diff --git a/mlp.py b/mlp.py new file mode 100644 index 0000000..0f3546b --- /dev/null +++ b/mlp.py @@ -0,0 +1,147 @@ +import numpy as np + + +def learn_once_mse(w1: np.array, b1: int, w2: np.array, b2: int, data: np.array, target: np.array, learning_rate: float): + """ + Performs one learning step of the MLP with MSE loss + + Arguments: + w1 -- weights of the hidden layer + b1 -- biaises of the hidden layer + w2 -- weights of the output layer + b2 -- biaises of the output layer + data -- input data + target -- target values + learning_rate -- learning rate + Returns: + w1 -- updated weights of the hidden layer + b1 -- updated biaises of the hidden layer + w2 -- updated weights of the output layer + b2 -- updated biaises of the output layer + loss -- loss of the forward pass + """ + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + # output of the hidden layer (sigmoid activation function) + a1 = 1 / (1 + np.exp(-z1)) + + z2 = np.matmul(a1, w2) + b2 # input of the output layer + # output of the output layer (sigmoid activation function) + a2 = 1 / (1 + np.exp(-z2)) + predictions = a2 # the predicted values are the outputs of the output layer + + # Compute loss (MSE) + loss = np.mean(np.square(predictions - target)) + + # Backward pass + # derivative of the loss with respect to the output of the output layer + dC_dA2 = 2 / len(b2) * (predictions - target) + # derivative of the loss with respect to the input of the output layer + dC_dZ2 = dC_dA2 * (1 - predictions) * predictions + # derivative of the loss with respect to the weights of the output layer + dC_dW2 = np.matmul(a1.T, dC_dZ2) + # derivative of the loss with respect to the biaises of the output layer + dC_dB2 = np.sum(dC_dZ2) + + # derivative of the loss with respect to the output of the hidden layer + dC_dA1 = np.matmul(dC_dZ2, w2.T) + # derivative of the loss with respect to the input of the hidden layer + dC_dZ1 = dC_dA1 * (1 - a1) * a1 + # derivative of the loss with respect to the weights of the hidden layer + dC_dW1 = np.matmul(a0.T, dC_dZ1) + # derivative of the loss with respect to the biaises of the hidden layer + dC_dB1 = np.sum(dC_dZ1) + + # Update weights and biaises + w1 -= learning_rate * dC_dW1 + b1 -= learning_rate * dC_dB1 + w2 -= learning_rate * dC_dW2 + b2 -= learning_rate * dC_dB2 + + return w1, b1, w2, b2, loss + + +def one_hot(labels: np.array): + """ + Converts a vector of labels into a one-hot matrix + + Arguments: + labels -- vector of labels + Returns: + one_hot_matrix -- one-hot matrix + """ + one_hot_matrix = np.zeros((len(labels), np.max(labels) + 1)) + one_hot_matrix[np.arange(len(labels)), labels] = 1 + return one_hot_matrix + + +def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.array, data: np.array, labels_train: np.array, learning_rate: int): + """ + Performs one learning step of the MLP with cross-entropy loss + + Arguments: + w1 -- weights of the hidden layer + b1 -- biaises of the hidden layer + w2 -- weights of the output layer + b2 -- biaises of the output layer + data -- input data + labels_train -- labels of the training data + learning_rate -- learning rate + Returns: + w1 -- updated weights of the hidden layer + b1 -- updated biaises of the hidden layer + w2 -- updated weights of the output layer + b2 -- updated biaises of the output layer + loss -- loss of the forward pass + """ + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + # output of the hidden layer (sigmoid activation function) + a1 = 1 / (1 + np.exp(-z1)) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + # output of the output layer (sigmoid activation function) + a2 = 1 / (1 + np.exp(-z2)) + predictions = a2 # the predicted values are the outputs of the output layer + + # Compute loss (cross-entropy) + loss = -np.mean(np.sum(labels_train * np.log(predictions) + + (1 - labels_train) * np.log(1 - predictions), axis=1)) + + # Backward pass + # derivative of the loss with respect to the output of the output layer + dC_dA2 = -labels_train / predictions + (1 - labels_train) / (1 - predictions) + # derivative of the loss with respect to the input of the output layer + # dC_dZ2 = a2 - + + # Update weights and biaises + w1 -= learning_rate * dC_dW1 + b1 -= learning_rate * dC_dB1 + w2 -= learning_rate * dC_dW2 + b2 -= learning_rate * dC_dB2 + + return w1, b1, w2, b2, loss + + +if __name__ == "__main__": + N = 30 # number of input data + d_in = 3 # input dimension + d_h = 3 # number of neurons in the hidden layer + d_out = 2 # output dimension (number of neurons of the output layer) + + # Random initialization of the network weights and biaises + w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights + b1 = np.zeros((1, d_h)) # first layer biaises + w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights + b2 = np.zeros((1, d_out)) # second layer biaises + + data = np.random.rand(N, d_in) # create a random data + targets = np.random.rand(N, d_out) # create a random targets + + for i in range(100): + w1, b1, w2, b2, loss = learn_once_mse( + w1, b1, w2, b2, data, targets, 0.1) + print(loss) + + print(one_hot(np.array([9, 1, 3, 0, 6, 5, 2, 7, 8, 4]))) diff --git a/read_cifar.py b/read_cifar.py index 29fa7c1..86d78f2 100644 --- a/read_cifar.py +++ b/read_cifar.py @@ -75,41 +75,6 @@ def split_dataset(data: np.array, labels: np.array, split: float): return data_train, labels_train, data_test, labels_test -def test_split_dataset(): - cifar_data, cifar_labels = read_cifar("data/cifar-10-batches-py/") - split = 0.8 - - datapoint_size = cifar_data.shape[1] - - data_train, labels_train, data_test, labels_test = split_dataset( - cifar_data, cifar_labels, split - ) - - expected_train_size = round(split * cifar_data.shape[0]) - expected_test_size = cifar_data.shape[0] - expected_train_size - - # check dimensions - assert data_train.shape == (expected_train_size, datapoint_size) - assert labels_train.shape == (expected_train_size,) - assert data_test.shape == (expected_test_size, datapoint_size) - assert labels_test.shape == (expected_test_size,) - - # check types - assert data_train.dtype == cifar_data.dtype - assert labels_train.dtype == cifar_labels.dtype - assert data_test.dtype == cifar_data.dtype - assert labels_test.dtype == cifar_labels.dtype - - # check that the first data and label are still in the dataset and match indices - # pick random index in the training set - i = np.random.randint(0, expected_train_size) - random_data = data_train[i] - random_label = labels_train[i] - # check that the data and label match in original dataset - original_index = np.where(np.all(cifar_data == random_data, axis=1))[0][0] - assert cifar_labels[original_index] == random_label - - if __name__ == "__main__": images, labels = read_cifar("data/cifar-10-batches-py/") print(images.shape) @@ -123,5 +88,4 @@ if __name__ == "__main__": print(labels_train[0]) print(data_test[0]) print(labels_test[0]) - test_split_dataset() \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_knn.py b/tests/test_knn.py new file mode 100644 index 0000000..9f4b8c8 --- /dev/null +++ b/tests/test_knn.py @@ -0,0 +1,31 @@ +import numpy as np +import sys +import pytest +from knn import * + +def test_distance_matrix(): + X = np.array([[1, 0], [0, 0]]) + Y = np.array([[1, 1], [0, 1]]) + result = distance_matrix(X, Y) + expected = np.array([[1, np.sqrt(2)], [np.sqrt(2), 1]]) + + np.testing.assert_equal(result, expected) + +def test_knn_predict(): + dist = np.array([[1, 4], [3, 2]]) + labels_train = np.array([0, 1]) + k = 1 + result = knn_predict(dist, labels_train, k) + expected = np.array([0, 1]) + + np.testing.assert_array_equal(result, expected) + +def test_evaluate_knn(): + data_train = np.array([[1, 2], [3, 4]]) + labels_train = np.array([0, 1]) + data_test = np.array([[5, 6], [7, 8]]) + labels_test = np.array([1, 0]) + k = 1 + result = evaluate_knn(data_train, labels_train, data_test, labels_test, k) + + assert result == 0.5 \ No newline at end of file diff --git a/tests/test_mlp.py b/tests/test_mlp.py new file mode 100644 index 0000000..662faf0 --- /dev/null +++ b/tests/test_mlp.py @@ -0,0 +1,35 @@ +import numpy as np +import sys +import pytest +from mlp import * + + +def test_learn_once_mse(): + N = 30 # number of input data + d_in = 3 # input dimension + d_h = 3 # number of neurons in the hidden layer + d_out = 2 # output dimension (number of neurons of the output layer) + + # Initialization of the network weights and biaises + w1 = np.zeros((d_in, d_h)) # first layer weights + b1 = np.zeros((1, d_h)) # first layer biaises + w2 = np.zeros((d_h, d_out)) # second layer weights + b2 = np.zeros((1, d_out)) # second layer biaises + + data = np.random.rand(N, d_in) # create a random data + targets = np.random.rand(N, d_out) # create a random targets + + w1, b1, w2, b2, loss = learn_once_mse( + w1, b1, w2, b2, data, targets, 0.1) + + w1, b1, w2, b2, loss2 = learn_once_mse( + w1, b1, w2, b2, data, targets, 0.1) + + assert loss2 < loss + +def test_one_hot(): + indices = np.array([2, 0, 1]) + result = one_hot(indices) + expected = np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) + + np.testing.assert_array_equal(result, expected) diff --git a/tests/test_read_cifar.py b/tests/test_read_cifar.py new file mode 100644 index 0000000..5cf13a5 --- /dev/null +++ b/tests/test_read_cifar.py @@ -0,0 +1,54 @@ +import numpy as np +import sys +import pytest +from read_cifar import * + +def test_read_cifar_batch(): + data, labels = read_cifar_batch("data/cifar-10-batches-py/data_batch_1") + + assert data.shape == (10000, 3072) + assert labels.shape == (10000,) + assert data.dtype == np.float32 + assert labels.dtype == np.int64 + +def test_read_cifar(): + data, labels = read_cifar("data/cifar-10-batches-py/") + + assert data.shape == (60000, 3072) + assert labels.shape == (60000,) + assert data.dtype == np.float32 + assert labels.dtype == np.int64 + +def test_split_dataset(): + cifar_data, cifar_labels = read_cifar("data/cifar-10-batches-py/") + split = 0.8 + + datapoint_size = cifar_data.shape[1] + + data_train, labels_train, data_test, labels_test = split_dataset( + cifar_data, cifar_labels, split + ) + + expected_train_size = round(split * cifar_data.shape[0]) + expected_test_size = cifar_data.shape[0] - expected_train_size + + # check dimensions + assert data_train.shape == (expected_train_size, datapoint_size) + assert labels_train.shape == (expected_train_size,) + assert data_test.shape == (expected_test_size, datapoint_size) + assert labels_test.shape == (expected_test_size,) + + # check types + assert data_train.dtype == cifar_data.dtype + assert labels_train.dtype == cifar_labels.dtype + assert data_test.dtype == cifar_data.dtype + assert labels_test.dtype == cifar_labels.dtype + + # check that the first data and label are still in the dataset and match indices + # pick random index in the training set + i = np.random.randint(0, expected_train_size) + random_data = data_train[i] + random_label = labels_train[i] + # check that the data and label match in original dataset + original_index = np.where(np.all(cifar_data == random_data, axis=1))[0][0] + assert cifar_labels[original_index] == random_label -- GitLab