Skip to content
Snippets Groups Projects
Commit 167ea132 authored by pierre-cau's avatar pierre-cau
Browse files

mlp

parent 6b34bc72
No related branches found
No related tags found
No related merge requests found
Showing
with 680 additions and 21 deletions
......@@ -22,6 +22,8 @@ pip install -r requirements.txt
The project is divided into the following directories:
- `data/`: Contains the dataset.
- `notebooks`: Contains all the notebooks of the project.
- `tests`: Contains all the tests of the code.
- `src/`: Contains the source code of the project.
- `src/utils/`: Contains utility functions such as `read_cifar` or `evaluate_knn`.
- `results/`: Contains the main images and linked files of the project.
......@@ -91,7 +93,9 @@ Using all this equation, I have coded some methods in the `mlp.py`file to train
Thus, for `split_factor=0.9`, `d_h=64`, `learning_rate=0.1` and `num_epoch=100`, we obtain the following curves :
![mlp_split_0.1](results\mlp_2.png)
![mlp_split_0.1](results\mlp_1.png)
Here we observe that the accuracy is increasing one epoch at a time but still. At the end, we reach about 23% of both test and train accuracy. This means that the algorithm is neither underfitted nor overfitted. Both the loss and train accuracy seem to be quite stable at the end which implies that the algorithm have finished its learning.
>Here we observe that the accuracy is increasing one epoch at a time but still. At the end, we reach about 27% of both test and train accuracy. This means that the algorithm is neither underfitted nor overfitted. Both the loss and train accuracy seem to be quite stable at the end which implies that the algorithm have finished its learning.
Nonetheless the accuracy is still very low and the algorithm can easily diverge due to exponential values, encountering overlfows. To counter this phenomenon, I made the choice to initialize the weights as tiny as possible but still randomly choosed. I have also introduced so `np.clip` methods and used an epsilon to respectively avoid overflows and dividing by zero.
![lr_comparaison](results\learning_rate_comparaison.png)
This diff is collapsed.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
File suppressed by a .gitattributes entry, the file's encoding is unsupported, or the file size exceeds the limit.
results/learning_rate__dh__comparaison.png

101 KiB

results/learning_rate_comparaison.png

120 KiB

results/mlp.png

59.4 KiB

results/mlp_1.png

45.1 KiB | W: | H:

results/mlp_1.png

40.1 KiB | W: | H:

results/mlp_1.png
results/mlp_1.png
results/mlp_1.png
results/mlp_1.png
  • 2-up
  • Swipe
  • Onion skin
results/mlp_2.png

52.4 KiB | W: | H:

results/mlp_2.png

40 KiB | W: | H:

results/mlp_2.png
results/mlp_2.png
results/mlp_2.png
results/mlp_2.png
  • 2-up
  • Swipe
  • Onion skin
results/training.png

197 KiB

# Author : Pierre CAU
# Date : 2024
from utils import *
from tqdm import tqdm
import matplotlib.pyplot as plt
......@@ -46,7 +49,7 @@ if __name__ == "__main__":
# Parameters
split_factor = 0.9
d_h = 64
learning_rate = 0.2
learning_rate = 0.1
num_epoch = 100
# Split the dataset
......
No preview for this file type
No preview for this file type
# Author : Pierre CAU
# Date : 2024
import numpy as np
......
......@@ -5,6 +5,16 @@ import numpy as np
from tqdm import tqdm
def sigmoid(x):
"""Calculation of the sigmoid function for a numpy array"""
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
"""Calculation of the sigmoid derivative function for a numpy array """
return sigmoid(x) * (1 - sigmoid(x))
def softmax(x):
"""
Return the softmax function of the x array
......@@ -83,10 +93,10 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
a0 = data
z1 = np.matmul(a0, w1) + b1
z1 = np.clip(z1, -500, 500)
a1 = 1 / (1 + np.exp(-z1))
a1 = sigmoid(z1)
z2 = np.matmul(a1, w2) + b2
z2 = np.clip(z2, -500, 500)
a2 = 1 / (1 + np.exp(-z2))
a2 = sigmoid(z2)
predictions = a2
# Compute loss (MSE)
......@@ -94,13 +104,13 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
# Backward pass
d_loss_a2 = 2 * (predictions - targets) / targets.shape[0]
d_a2_z2 = a2 * (1 - a2)
d_a2_z2 = sigmoid_derivative(z2)
d_loss_z2 = d_loss_a2 * d_a2_z2
d_loss_w2 = np.matmul(a1.T, d_loss_z2)
d_loss_b2 = np.sum(d_loss_z2, axis=0, keepdims=True)
d_loss_a1 = np.matmul(d_loss_z2, w2.T)
d_a1_z1 = a1 * (1 - a1)
d_a1_z1 = sigmoid_derivative(z1)
d_loss_z1 = d_loss_a1 * d_a1_z1
d_loss_w1 = np.matmul(a0.T, d_loss_z1)
......@@ -114,6 +124,16 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
return w1, b1, w2, b2, loss
def cross_entropy(y, y_pred):
"""
Function that calculates the cross entropy between predicted
and target values
"""
epsilon = 10 ** (-15)
return -np.sum(y * np.log(y_pred + epsilon)) / float(y_pred.shape[0])
def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
"""
Perform one iteration of the training loop for a simple MLP with binary cross-entropy loss.
......@@ -157,7 +177,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
a0 = data
z1 = np.matmul(a0, w1) + b1
z1 = np.clip(z1, -500, 500) # Avoid overflow
a1 = 1 / (1 + np.exp(-z1))
a1 = sigmoid(z1)
z2 = np.matmul(a1, w2) + b2
z2 = np.clip(z2, -500, 500) # Avoid overflow
a2 = softmax(z2)
......@@ -175,6 +195,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
# Compute loss (binary cross-entropy)
# print(labels_train * np.log(predictions) + (1 - labels_train) * np.log(1 - predictions))
loss = -np.mean(labels_train * np.log(predictions) + (1 - labels_train) * np.log(1 - predictions))
if np.isnan(loss):
# print(labels_train)
# print(predictions)
......@@ -183,18 +204,20 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
raise ValueError("Loss is NaN → Try reducing the learning rate or normalizing the data.")
# Backward pass
d_loss_a2 = predictions - labels_train
d_a2_z2 = a2 * (1 - a2)
d_loss_z2 = d_loss_a2 * d_a2_z2
loss = cross_entropy(labels_train,predictions)
d_loss_z2 = (a2 - labels_train) / data.shape[0]
# Layer 2
d_loss_w2 = np.matmul(a1.T,d_loss_z2 )
d_loss_b2 = np.sum(d_loss_z2, axis=0, keepdims=True)
d_loss_b2 = np.sum(d_loss_z2, axis=0)
# Layer 1
d_loss_a1 = np.matmul(d_loss_z2 , w2.T)
d_a1_z1 = a1 * (1 - a1)
d_loss_z1 = d_loss_a1 * d_a1_z1
d_loss_z1 = d_loss_a1 * sigmoid_derivative(z1)
d_loss_w1 = np.matmul(a0.T,d_loss_z1 )
d_loss_b1 = np.sum(d_loss_z1, axis=0, keepdims=True)
d_loss_b1 = np.sum(d_loss_z1, axis=0)
# Update weights and biases
w1 -= learning_rate * d_loss_w1
......@@ -202,6 +225,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
w2 -= learning_rate * d_loss_w2
b2 -= learning_rate * d_loss_b2
# print(w1, b1, w2, b2, loss)
return w1, b1, w2, b2, loss
......@@ -259,15 +283,22 @@ def train_mlp(w1,
for epoch in tqdm(range(num_epoch), desc="Training", leave=False):
w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
losses.append(loss)
# Compute accuracy
predictions = 1 / (1 + np.exp(-np.matmul(1 / (1 + np.exp(-np.matmul(data_train, w1) - b1)), w2) - b2))
a0 = data_train # Input to the first layer
z1 = np.matmul(a0, w1) + b1 # Input to the hidden layer
a1 = sigmoid(z1) # Output of the hidden layer
z2 = np.matmul(a1, w2) + b2 # Input to the output layer
a2 = softmax(z2) # Output of the output layer
predictions = a2
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(labels_train, axis=1)
accuracy = np.mean(predicted_classes == true_classes)
train_accuracies.append(accuracy)
if verbose:
tqdm.write(f"Epoch {epoch + 1}/{num_epoch} - Loss: {loss:.4f} - Accuracy: {accuracy:.4f}") # We modify the text to display the loss with tqdm.write
tqdm.write(f"Epoch {epoch + 1}/{num_epoch}: Loss = {loss:.4f}, Accuracy = {accuracy * 100:.2f}%")
if return_loss:
return w1, b1, w2, b2, train_accuracies, losses
......@@ -387,6 +418,7 @@ def run_mlp_training(data_train,
if return_loss:
return train_accuracies, test_accuracy, losses
return train_accuracies, test_accuracy
def Z_score_normalize(data_train, data_test):
......
......@@ -3,6 +3,7 @@
import numpy as np
def split_dataset(data, labels, split):
"""
Split the dataset into a training set and a test set.
......
File added
File added
File added
# Author : Pierre
# Date : 2024
import pytest
@pytest.fixture
def global_params():
return {
"nb_of_train_samples": 10,
"nb_of_test_samples": 10,
"dim": 3072,
"int_min": 0,
"int_max": 10,
"seed":45,
"d_h":64,
"learning_rate":0.01,
"num_epoch":5,
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment