Skip to content
Snippets Groups Projects
Unverified Commit a95e44f9 authored by Jangberry (Nomad-Debian)'s avatar Jangberry (Nomad-Debian)
Browse files

jusqu'à Q13

parent 6e2e532b
No related branches found
No related tags found
No related merge requests found
{
"python.analysis.autoImportCompletions": true,
"python.analysis.typeCheckingMode": "off"
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
\ No newline at end of file
%% Cell type:code id: tags:
``` python
import read_cifar
import numpy as np
```
%% Cell type:markdown id: tags:
MIT License
Copyright (c) 2022 Quentin GALLOUÉDEC
Copyright (c) 2023 [Matthieu Massardier]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
......
MIT License
Copyright (c) 2022 Quentin GALLOUÉDEC
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
\ No newline at end of file
......@@ -2,9 +2,9 @@
## Setup
1. Download the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder.
1. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`.
1. Install the requirements using `pip install -r requirements.txt`.
1. Download and extract the [CIFAR dataset](https://www.cs.toronto.edu/~kriz/cifar.html) `cifar-10-batches-py` in the [data](./data/) folder.
2. (*optionnal*) You might want to create a venv with `python -m venv .venv` and activate it with `source .venv/bin/activate`.
3. Install the requirements using `pip install -r requirements.txt`.
## Usage
......@@ -13,12 +13,12 @@ Otherwise here is a test result: ![knn test result](./results/knn.png)
## Some proofs
### *1. Prove that $`\sigma' = \sigma \times (1-\sigma)`$*
### *1\. Prove that $*`\sigma' = \sigma \times (1-\sigma)`$
To prove that, let's firt derive the sigmoid function:
$\sigma(x) = \frac{1}{1+e^{-x}}$
so $\sigma'(x)=\frac{e^{-x}}{(1+e^{-x})^2}$
$\sigma'(x)=\frac{1}{1+e^{-x}}\times\frac{e^{-x}}{1+e^{-x}}$
Here we can identify $\frac{1}{1+e^{-x}} = \sigma(x)$
And $\frac{e^{-x}}{1+e^{-x}} = 1 - \frac{1}{1+e^{x}}$
So $\sigma'(x) = \sigma(x) \times (1 - \sigma(x))$
\ No newline at end of file
$\\sigma(x) = \\frac{1}{1+e^{-x}}$
so $\\sigma'(x)=\\frac{e^{-x}}{(1+e^{-x})^2}$
$\\sigma'(x)=\\frac{1}{1+e^{-x}}\\times\\frac{e^{-x}}{1+e^{-x}}$
Here we can identify $\\frac{1}{1+e^{-x}} = \\sigma(x)$
And $\\frac{e^{-x}}{1+e^{-x}} = 1 - \\frac{1}{1+e^{x}}$
So $\\sigma'(x) = \\sigma(x) \\times (1 - \\sigma(x))$
mlp.py 0 → 100644
import numpy as np
def learn_once_mse(w1: np.array, b1: int, w2: np.array, b2: int, data: np.array, target: np.array, learning_rate: float):
"""
Performs one learning step of the MLP with MSE loss
Arguments:
w1 -- weights of the hidden layer
b1 -- biaises of the hidden layer
w2 -- weights of the output layer
b2 -- biaises of the output layer
data -- input data
target -- target values
learning_rate -- learning rate
Returns:
w1 -- updated weights of the hidden layer
b1 -- updated biaises of the hidden layer
w2 -- updated weights of the output layer
b2 -- updated biaises of the output layer
loss -- loss of the forward pass
"""
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
# output of the hidden layer (sigmoid activation function)
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2 # input of the output layer
# output of the output layer (sigmoid activation function)
a2 = 1 / (1 + np.exp(-z2))
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - target))
# Backward pass
# derivative of the loss with respect to the output of the output layer
dC_dA2 = 2 / len(b2) * (predictions - target)
# derivative of the loss with respect to the input of the output layer
dC_dZ2 = dC_dA2 * (1 - predictions) * predictions
# derivative of the loss with respect to the weights of the output layer
dC_dW2 = np.matmul(a1.T, dC_dZ2)
# derivative of the loss with respect to the biaises of the output layer
dC_dB2 = np.sum(dC_dZ2)
# derivative of the loss with respect to the output of the hidden layer
dC_dA1 = np.matmul(dC_dZ2, w2.T)
# derivative of the loss with respect to the input of the hidden layer
dC_dZ1 = dC_dA1 * (1 - a1) * a1
# derivative of the loss with respect to the weights of the hidden layer
dC_dW1 = np.matmul(a0.T, dC_dZ1)
# derivative of the loss with respect to the biaises of the hidden layer
dC_dB1 = np.sum(dC_dZ1)
# Update weights and biaises
w1 -= learning_rate * dC_dW1
b1 -= learning_rate * dC_dB1
w2 -= learning_rate * dC_dW2
b2 -= learning_rate * dC_dB2
return w1, b1, w2, b2, loss
def one_hot(labels: np.array):
"""
Converts a vector of labels into a one-hot matrix
Arguments:
labels -- vector of labels
Returns:
one_hot_matrix -- one-hot matrix
"""
one_hot_matrix = np.zeros((len(labels), np.max(labels) + 1))
one_hot_matrix[np.arange(len(labels)), labels] = 1
return one_hot_matrix
def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.array, data: np.array, labels_train: np.array, learning_rate: int):
"""
Performs one learning step of the MLP with cross-entropy loss
Arguments:
w1 -- weights of the hidden layer
b1 -- biaises of the hidden layer
w2 -- weights of the output layer
b2 -- biaises of the output layer
data -- input data
labels_train -- labels of the training data
learning_rate -- learning rate
Returns:
w1 -- updated weights of the hidden layer
b1 -- updated biaises of the hidden layer
w2 -- updated weights of the output layer
b2 -- updated biaises of the output layer
loss -- loss of the forward pass
"""
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
# output of the hidden layer (sigmoid activation function)
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2 # input of the output layer
# output of the output layer (sigmoid activation function)
a2 = 1 / (1 + np.exp(-z2))
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (cross-entropy)
loss = -np.mean(np.sum(labels_train * np.log(predictions) +
(1 - labels_train) * np.log(1 - predictions), axis=1))
# Backward pass
# derivative of the loss with respect to the output of the output layer
dC_dA2 = -labels_train / predictions + (1 - labels_train) / (1 - predictions)
# derivative of the loss with respect to the input of the output layer
# dC_dZ2 = a2 -
# Update weights and biaises
w1 -= learning_rate * dC_dW1
b1 -= learning_rate * dC_dB1
w2 -= learning_rate * dC_dW2
b2 -= learning_rate * dC_dB2
return w1, b1, w2, b2, loss
if __name__ == "__main__":
N = 30 # number of input data
d_in = 3 # input dimension
d_h = 3 # number of neurons in the hidden layer
d_out = 2 # output dimension (number of neurons of the output layer)
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
data = np.random.rand(N, d_in) # create a random data
targets = np.random.rand(N, d_out) # create a random targets
for i in range(100):
w1, b1, w2, b2, loss = learn_once_mse(
w1, b1, w2, b2, data, targets, 0.1)
print(loss)
print(one_hot(np.array([9, 1, 3, 0, 6, 5, 2, 7, 8, 4])))
......@@ -75,41 +75,6 @@ def split_dataset(data: np.array, labels: np.array, split: float):
return data_train, labels_train, data_test, labels_test
def test_split_dataset():
cifar_data, cifar_labels = read_cifar("data/cifar-10-batches-py/")
split = 0.8
datapoint_size = cifar_data.shape[1]
data_train, labels_train, data_test, labels_test = split_dataset(
cifar_data, cifar_labels, split
)
expected_train_size = round(split * cifar_data.shape[0])
expected_test_size = cifar_data.shape[0] - expected_train_size
# check dimensions
assert data_train.shape == (expected_train_size, datapoint_size)
assert labels_train.shape == (expected_train_size,)
assert data_test.shape == (expected_test_size, datapoint_size)
assert labels_test.shape == (expected_test_size,)
# check types
assert data_train.dtype == cifar_data.dtype
assert labels_train.dtype == cifar_labels.dtype
assert data_test.dtype == cifar_data.dtype
assert labels_test.dtype == cifar_labels.dtype
# check that the first data and label are still in the dataset and match indices
# pick random index in the training set
i = np.random.randint(0, expected_train_size)
random_data = data_train[i]
random_label = labels_train[i]
# check that the data and label match in original dataset
original_index = np.where(np.all(cifar_data == random_data, axis=1))[0][0]
assert cifar_labels[original_index] == random_label
if __name__ == "__main__":
images, labels = read_cifar("data/cifar-10-batches-py/")
print(images.shape)
......@@ -123,5 +88,4 @@ if __name__ == "__main__":
print(labels_train[0])
print(data_test[0])
print(labels_test[0])
test_split_dataset()
\ No newline at end of file
import numpy as np
import sys
import pytest
from knn import *
def test_distance_matrix():
X = np.array([[1, 0], [0, 0]])
Y = np.array([[1, 1], [0, 1]])
result = distance_matrix(X, Y)
expected = np.array([[1, np.sqrt(2)], [np.sqrt(2), 1]])
np.testing.assert_equal(result, expected)
def test_knn_predict():
dist = np.array([[1, 4], [3, 2]])
labels_train = np.array([0, 1])
k = 1
result = knn_predict(dist, labels_train, k)
expected = np.array([0, 1])
np.testing.assert_array_equal(result, expected)
def test_evaluate_knn():
data_train = np.array([[1, 2], [3, 4]])
labels_train = np.array([0, 1])
data_test = np.array([[5, 6], [7, 8]])
labels_test = np.array([1, 0])
k = 1
result = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
assert result == 0.5
\ No newline at end of file
import numpy as np
import sys
import pytest
from mlp import *
def test_learn_once_mse():
N = 30 # number of input data
d_in = 3 # input dimension
d_h = 3 # number of neurons in the hidden layer
d_out = 2 # output dimension (number of neurons of the output layer)
# Initialization of the network weights and biaises
w1 = np.zeros((d_in, d_h)) # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = np.zeros((d_h, d_out)) # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
data = np.random.rand(N, d_in) # create a random data
targets = np.random.rand(N, d_out) # create a random targets
w1, b1, w2, b2, loss = learn_once_mse(
w1, b1, w2, b2, data, targets, 0.1)
w1, b1, w2, b2, loss2 = learn_once_mse(
w1, b1, w2, b2, data, targets, 0.1)
assert loss2 < loss
def test_one_hot():
indices = np.array([2, 0, 1])
result = one_hot(indices)
expected = np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0]])
np.testing.assert_array_equal(result, expected)
import numpy as np
import sys
import pytest
from read_cifar import *
def test_read_cifar_batch():
data, labels = read_cifar_batch("data/cifar-10-batches-py/data_batch_1")
assert data.shape == (10000, 3072)
assert labels.shape == (10000,)
assert data.dtype == np.float32
assert labels.dtype == np.int64
def test_read_cifar():
data, labels = read_cifar("data/cifar-10-batches-py/")
assert data.shape == (60000, 3072)
assert labels.shape == (60000,)
assert data.dtype == np.float32
assert labels.dtype == np.int64
def test_split_dataset():
cifar_data, cifar_labels = read_cifar("data/cifar-10-batches-py/")
split = 0.8
datapoint_size = cifar_data.shape[1]
data_train, labels_train, data_test, labels_test = split_dataset(
cifar_data, cifar_labels, split
)
expected_train_size = round(split * cifar_data.shape[0])
expected_test_size = cifar_data.shape[0] - expected_train_size
# check dimensions
assert data_train.shape == (expected_train_size, datapoint_size)
assert labels_train.shape == (expected_train_size,)
assert data_test.shape == (expected_test_size, datapoint_size)
assert labels_test.shape == (expected_test_size,)
# check types
assert data_train.dtype == cifar_data.dtype
assert labels_train.dtype == cifar_labels.dtype
assert data_test.dtype == cifar_data.dtype
assert labels_test.dtype == cifar_labels.dtype
# check that the first data and label are still in the dataset and match indices
# pick random index in the training set
i = np.random.randint(0, expected_train_size)
random_data = data_train[i]
random_label = labels_train[i]
# check that the data and label match in original dataset
original_index = np.where(np.all(cifar_data == random_data, axis=1))[0][0]
assert cifar_labels[original_index] == random_label
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment