mlp

167ea132 · pierre-cau · 6b34bc72 · 167ea132 · 167ea132 · 167ea132
Commit 167ea132 authored 8 months ago by pierre-cau
--- a/README.md
+++ b/README.md
@@ -22,6 +22,8 @@ pip install -r requirements.txt
 The project is divided into the following directories:

 - `data/`: Contains the dataset.
+- `notebooks`: Contains all the notebooks of the project.
+- `tests`: Contains all the tests of the code.
 - `src/`: Contains the source code of the project.
 - `src/utils/`: Contains utility functions such as `read_cifar` or `evaluate_knn`.
 - `results/`: Contains the main images and linked files of the project.
@@ -91,7 +93,9 @@ Using all this equation, I have coded some methods in the `mlp.py`file to train

 Thus, for `split_factor=0.9`, `d_h=64`, `learning_rate=0.1` and `num_epoch=100`, we obtain the following curves : 

-![mlp_split_0.1](results\mlp_2.png)
+![mlp_split_0.1](results\mlp_1.png)

-Here we observe that the accuracy is increasing one epoch at a time but still. At the end, we reach about 23% of both test and train accuracy. This means that the algorithm is neither underfitted nor overfitted. Both the loss and train accuracy seem to be quite stable at the end which implies that the algorithm have finished its learning.
+>Here we observe that the accuracy is increasing one epoch at a time but still. At the end, we reach about 27% of both test and train accuracy. This means that the algorithm is neither underfitted nor overfitted. Both the loss and train accuracy seem to be quite stable at the end which implies that the algorithm have finished its learning.
 Nonetheless the accuracy is still very low and the algorithm can easily diverge due to exponential values, encountering overlfows. To counter this phenomenon, I made the choice to initialize the weights as tiny as possible but still randomly choosed. I have also introduced so `np.clip` methods and used an epsilon to respectively avoid overflows and dividing by zero.
+
+![lr_comparaison](results\learning_rate_comparaison.png)
--- a/notebooks/knn.ipynb
+++ b/notebooks/knn.ipynb
--- a/notebooks/mlp.ipynb
+++ b/notebooks/mlp.ipynb
--- a/requirements.txt
+++ b/requirements.txt
--- a/results/learning_rate__dh__comparaison.png
+++ b/results/learning_rate__dh__comparaison.png
--- a/results/learning_rate_comparaison.png
+++ b/results/learning_rate_comparaison.png
--- a/results/mlp.png
+++ b/results/mlp.png
--- a/results/mlp_1.png
+++ b/results/mlp_1.png
--- a/results/mlp_2.png
+++ b/results/mlp_2.png
--- a/results/training.png
+++ b/results/training.png
--- a/src/main.py
+++ b/src/main.py

+# Author : Pierre CAU
+# Date : 2024
+
 from utils import *
 from tqdm import tqdm
 import matplotlib.pyplot as plt
@@ -46,7 +49,7 @@ if __name__ == "__main__":
    # Parameters
    split_factor = 0.9
    d_h = 64
-    learning_rate = 0.2
+    learning_rate = 0.1
    num_epoch = 100

    # Split the dataset

--- a/src/utils/__pycache__/knn.cpython-38.pyc
+++ b/src/utils/__pycache__/knn.cpython-38.pyc
--- a/src/utils/__pycache__/mlp.cpython-38.pyc
+++ b/src/utils/__pycache__/mlp.cpython-38.pyc
--- a/src/utils/knn.py
+++ b/src/utils/knn.py
+# Author : Pierre CAU
+# Date : 2024
+
 import numpy as np



--- a/src/utils/mlp.py
+++ b/src/utils/mlp.py
@@ -5,6 +5,16 @@ import numpy as np
 from tqdm import tqdm


+
+def sigmoid(x):
+    """Calculation of the sigmoid function for a numpy array"""
+    return 1 / (1 + np.exp(-x))
+
+def sigmoid_derivative(x):
+    """Calculation of the sigmoid derivative function for a numpy array """
+    return sigmoid(x) * (1 - sigmoid(x))
+
+
 def softmax(x):
    """
    Return the softmax function of the x array
@@ -83,10 +93,10 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
    a0 = data
    z1 = np.matmul(a0, w1) + b1
    z1 = np.clip(z1, -500, 500)
-    a1 = 1 / (1 + np.exp(-z1))
+    a1 = sigmoid(z1)
    z2 = np.matmul(a1, w2) + b2
    z2 = np.clip(z2, -500, 500)
-    a2 = 1 / (1 + np.exp(-z2))
+    a2 = sigmoid(z2)
    predictions = a2

    # Compute loss (MSE)
@@ -94,13 +104,13 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):

    # Backward pass
    d_loss_a2 = 2 * (predictions - targets) / targets.shape[0]
-    d_a2_z2 = a2 * (1 - a2)
+    d_a2_z2 = sigmoid_derivative(z2)
    d_loss_z2 = d_loss_a2 * d_a2_z2

    d_loss_w2 = np.matmul(a1.T, d_loss_z2)
    d_loss_b2 = np.sum(d_loss_z2, axis=0, keepdims=True)
    d_loss_a1 = np.matmul(d_loss_z2, w2.T)
-    d_a1_z1 = a1 * (1 - a1)
+    d_a1_z1 = sigmoid_derivative(z1)
    d_loss_z1 = d_loss_a1 * d_a1_z1

    d_loss_w1 = np.matmul(a0.T, d_loss_z1)
@@ -114,6 +124,16 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):

    return w1, b1, w2, b2, loss

+
+def cross_entropy(y, y_pred):
+    """
+    Function that calculates the cross entropy between predicted
+    and target values
+    """
+    epsilon = 10 ** (-15)
+    return -np.sum(y * np.log(y_pred + epsilon)) / float(y_pred.shape[0])
+
+
 def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
    """
    Perform one iteration of the training loop for a simple MLP with binary cross-entropy loss.
@@ -157,7 +177,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
    a0 = data
    z1 = np.matmul(a0, w1) + b1
    z1 = np.clip(z1, -500, 500) # Avoid overflow
-    a1 = 1 / (1 + np.exp(-z1))
+    a1 = sigmoid(z1)
    z2 = np.matmul(a1, w2) + b2
    z2 = np.clip(z2, -500, 500) # Avoid overflow
    a2 = softmax(z2)
@@ -175,6 +195,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
    # Compute loss (binary cross-entropy)
    # print(labels_train * np.log(predictions) + (1 - labels_train) * np.log(1 - predictions))
    loss = -np.mean(labels_train * np.log(predictions) + (1 - labels_train) * np.log(1 - predictions))
+    
    if np.isnan(loss):
        # print(labels_train)
        # print(predictions)
@@ -183,18 +204,20 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
        raise ValueError("Loss is NaN → Try reducing the learning rate or normalizing the data.")
    
    # Backward pass
-    d_loss_a2 = predictions - labels_train
-    d_a2_z2 = a2 * (1 - a2)
-    d_loss_z2 = d_loss_a2 * d_a2_z2
+    loss = cross_entropy(labels_train,predictions)
+    d_loss_z2 = (a2 - labels_train) / data.shape[0]

+    # Layer 2
    d_loss_w2 = np.matmul(a1.T,d_loss_z2 ) 
-    d_loss_b2 = np.sum(d_loss_z2, axis=0, keepdims=True)
+    d_loss_b2 = np.sum(d_loss_z2, axis=0)
+
+    # Layer 1
+
    d_loss_a1 = np.matmul(d_loss_z2 , w2.T)
-    d_a1_z1 = a1 * (1 - a1)
-    d_loss_z1 = d_loss_a1 * d_a1_z1
+    d_loss_z1 = d_loss_a1 * sigmoid_derivative(z1)

    d_loss_w1 = np.matmul(a0.T,d_loss_z1 ) 
-    d_loss_b1 = np.sum(d_loss_z1, axis=0, keepdims=True)
+    d_loss_b1 = np.sum(d_loss_z1, axis=0)

    # Update weights and biases
    w1 -= learning_rate * d_loss_w1
@@ -202,6 +225,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
    w2 -= learning_rate * d_loss_w2
    b2 -= learning_rate * d_loss_b2

+
    # print(w1, b1, w2, b2, loss)
    return w1, b1, w2, b2, loss
    
@@ -259,15 +283,22 @@ def train_mlp(w1,
    for epoch in tqdm(range(num_epoch), desc="Training", leave=False):
        w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
        losses.append(loss)
+        
        # Compute accuracy
-        predictions = 1 / (1 + np.exp(-np.matmul(1 / (1 + np.exp(-np.matmul(data_train, w1) - b1)), w2) - b2))
+        a0 = data_train # Input to the first layer
+        z1 = np.matmul(a0, w1) + b1  # Input to the hidden layer
+        a1 = sigmoid(z1)  # Output of the hidden layer
+        z2 = np.matmul(a1, w2) + b2  # Input to the output layer
+        a2 = softmax(z2)  # Output of the output layer
+
+        predictions = a2
        predicted_classes = np.argmax(predictions, axis=1)
        true_classes = np.argmax(labels_train, axis=1)
        accuracy = np.mean(predicted_classes == true_classes)
        train_accuracies.append(accuracy)

        if verbose:
-            tqdm.write(f"Epoch {epoch + 1}/{num_epoch} - Loss: {loss:.4f} - Accuracy: {accuracy:.4f}") # We modify the text to display the loss with tqdm.write
+            tqdm.write(f"Epoch {epoch + 1}/{num_epoch}: Loss = {loss:.4f}, Accuracy = {accuracy * 100:.2f}%")
        
    if return_loss:
        return w1, b1, w2, b2, train_accuracies, losses
@@ -387,6 +418,7 @@ def run_mlp_training(data_train,

    if return_loss:
        return train_accuracies, test_accuracy, losses
+    
    return train_accuracies, test_accuracy

 def Z_score_normalize(data_train, data_test):

--- a/src/utils/split_data.py
+++ b/src/utils/split_data.py
@@ -3,6 +3,7 @@

 import numpy as np

+
 def split_dataset(data, labels, split):
    """
    Split the dataset into a training set and a test set.

--- a/tests/__pycache__/conftest.cpython-38-pytest-8.3.3.pyc
+++ b/tests/__pycache__/conftest.cpython-38-pytest-8.3.3.pyc
--- a/tests/__pycache__/test_MLP.cpython-38-pytest-8.3.3.pyc
+++ b/tests/__pycache__/test_MLP.cpython-38-pytest-8.3.3.pyc
--- a/tests/__pycache__/test_mpl.cpython-38-pytest-8.3.3.pyc
+++ b/tests/__pycache__/test_mpl.cpython-38-pytest-8.3.3.pyc
--- a/tests/conftest.py
+++ b/tests/conftest.py
+
+# Author : Pierre
+# Date : 2024
+
+import pytest
+
+@pytest.fixture
+def global_params():
+    return {
+        "nb_of_train_samples": 10,
+        "nb_of_test_samples": 10,
+        "dim": 3072,
+        "int_min": 0,
+        "int_max": 10,
+        "seed":45, 
+        "d_h":64, 
+        "learning_rate":0.01,
+        "num_epoch":5,
+    }
\ No newline at end of file