Skip to content
Snippets Groups Projects
Commit 6b34bc72 authored by pierre-cau's avatar pierre-cau
Browse files

readme mlp

parent 523a93f9
No related branches found
No related tags found
No related merge requests found
......@@ -24,7 +24,7 @@ The project is divided into the following directories:
- `data/`: Contains the dataset.
- `src/`: Contains the source code of the project.
- `src/utils/`: Contains utility functions such as `read_cifar` or `evaluate_knn`.
- `assets/`: Contains the main images and linked files of the project.
- `results/`: Contains the main images and linked files of the project.
- `src/main.py`: Main file of the project.
___
......@@ -83,3 +83,15 @@ $\frac{\partial C}{\partial Z^{(1)}} = \frac{\partial C}{\partial A^{(1)}} \odot
8. The gradient of the cost function with respect to the biases of the first layer, $B^{(1)}$, is given by:
$\frac{\partial C}{\partial B^{(1)}} = \frac{\partial C}{\partial Z^{(1)}}$
____
### Results
Using all this equation, I have coded some methods in the `mlp.py`file to train the neural network, espcially the `run_mlp_training`.
Thus, for `split_factor=0.9`, `d_h=64`, `learning_rate=0.1` and `num_epoch=100`, we obtain the following curves :
![mlp_split_0.1](results\mlp_2.png)
Here we observe that the accuracy is increasing one epoch at a time but still. At the end, we reach about 23% of both test and train accuracy. This means that the algorithm is neither underfitted nor overfitted. Both the loss and train accuracy seem to be quite stable at the end which implies that the algorithm have finished its learning.
Nonetheless the accuracy is still very low and the algorithm can easily diverge due to exponential values, encountering overlfows. To counter this phenomenon, I made the choice to initialize the weights as tiny as possible but still randomly choosed. I have also introduced so `np.clip` methods and used an epsilon to respectively avoid overflows and dividing by zero.
\ No newline at end of file
File moved
results/mlp.png

59.4 KiB

results/mlp_1.png

45.1 KiB

results/mlp_2.png

52.4 KiB

......@@ -46,27 +46,56 @@ if __name__ == "__main__":
# Parameters
split_factor = 0.9
d_h = 64
learning_rate = 0.1
learning_rate = 0.2
num_epoch = 100
# Split the dataset
data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split_factor)
# Normalize the data
data_train, data_test = Z_score_normalize(data_train, data_test)
# Ensure labels are one-hot encoded
labels_train = one_hot(labels_train)
labels_test = one_hot(labels_test)
# Run MLP training
train_accuracies, test_accuracies = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch)
train_accuracies, test_accuracies, losses = run_mlp_training(
data_train=data_train,
labels_train=labels_train,
data_test=data_test,
labels_test=labels_test,
d_h=d_h,
learning_rate=learning_rate,
num_epoch=num_epoch,
return_loss=True,
verbose=True
)
print(f"Final test accuracy: {test_accuracies}")
print(f"Final train accuracy: {train_accuracies[-1]}")
# Plot the evolution of learning accuracy
plt.figure()
plt.plot(range(num_epoch), train_accuracies, label='Train Accuracy')
plt.plot(range(num_epoch), test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('MLP Training Accuracy Evolution')
plt.legend()
fig, ax1 = plt.subplots()
# Plot train accuracies on the first y-axis
ax1.plot(range(num_epoch), train_accuracies, label='Train Accuracy', color='b')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Train Accuracy', color='b')
ax1.tick_params(axis='y', labelcolor='b')
ax1.xaxis.set_major_locator(plt.MaxNLocator(integer=True)) # Ensure only integer ticks on x-axis
# Create a second y-axis to plot the losses
ax2 = ax1.twinx()
ax2.plot(range(num_epoch), losses, label='Loss', color='r')
ax2.set_ylabel('Loss', color='r')
ax2.tick_params(axis='y', labelcolor='r')
# Add title and grid
plt.title(f'MLP Training Accuracy and Loss Evolution\n(d_h={d_h}, learning_rate={learning_rate}, num_epoch={num_epoch})')
fig.tight_layout()
plt.grid()
# Save and show the plot
plt.savefig('../results/mlp.png')
plt.show()
\ No newline at end of file
No preview for this file type
No preview for this file type
import numpy as np
def distance_matrix(matrix1, matrix2):
"""
Compute the L2 Euclidean distance matrix between two matrices.
......
......@@ -2,6 +2,25 @@
# Date : 2024
import numpy as np
from tqdm import tqdm
def softmax(x):
"""
Return the softmax function of the x array
Parameters
----------
x : np.ndarray
input vector
Returns
-------
Softmax of x
"""
exp = np.exp(x - np.max(x,axis=1,keepdims=True))
return exp / np.sum(exp,axis=1,keepdims=True)
def one_hot(array):
"""
......@@ -63,8 +82,10 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
# Forward pass
a0 = data
z1 = np.matmul(a0, w1) + b1
z1 = np.clip(z1, -500, 500)
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
z2 = np.clip(z2, -500, 500)
a2 = 1 / (1 + np.exp(-z2))
predictions = a2
......@@ -135,13 +156,31 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
# Forward pass
a0 = data
z1 = np.matmul(a0, w1) + b1
z1 = np.clip(z1, -500, 500) # Avoid overflow
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
a2 = 1 / (1 + np.exp(-z2))
z2 = np.clip(z2, -500, 500) # Avoid overflow
a2 = softmax(z2)
# print("\n A0 :",a0.min(), a0.max(), a0.mean(), a0.std())
# print("Z1 :",z1.min(), z1.max(), z1.mean(), z1.std())
# print("A1 :",a1.min(), a1.max(), a1.mean(), a1.std())
# print("Z2 :",z2.min(), z2.max(), z2.mean(), z2.std())
# print("A2 :",a2.min(), a2.max(), a2.mean(), a2.std())
predictions = a2
epsilon = 1e-10
predictions = np.clip(predictions, epsilon, 1 - epsilon) # Avoid log(0)
# Compute loss (binary cross-entropy)
# print(labels_train * np.log(predictions) + (1 - labels_train) * np.log(1 - predictions))
loss = -np.mean(labels_train * np.log(predictions) + (1 - labels_train) * np.log(1 - predictions))
if np.isnan(loss):
# print(labels_train)
# print(predictions)
# print(np.log(predictions))
# print(np.log(1 - predictions))
raise ValueError("Loss is NaN → Try reducing the learning rate or normalizing the data.")
# Backward pass
d_loss_a2 = predictions - labels_train
......@@ -163,9 +202,20 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
w2 -= learning_rate * d_loss_w2
b2 -= learning_rate * d_loss_b2
# print(w1, b1, w2, b2, loss)
return w1, b1, w2, b2, loss
def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
def train_mlp(w1,
b1,
w2,
b2,
data_train,
labels_train,
learning_rate,
num_epoch,
return_loss = False,
verbose = False
):
"""
Train a simple MLP for a given number of epochs.
......@@ -187,6 +237,8 @@ def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch
Learning rate of the optimizer.
num_epoch : int
Number of training epochs.
return_loss : bool
If True, return the loss across epochs.
Returns
-------
......@@ -202,10 +254,11 @@ def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch
List of training accuracies across epochs.
"""
train_accuracies = []
losses = []
for epoch in range(num_epoch):
for epoch in tqdm(range(num_epoch), desc="Training", leave=False):
w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
losses.append(loss)
# Compute accuracy
predictions = 1 / (1 + np.exp(-np.matmul(1 / (1 + np.exp(-np.matmul(data_train, w1) - b1)), w2) - b2))
predicted_classes = np.argmax(predictions, axis=1)
......@@ -213,8 +266,11 @@ def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch
accuracy = np.mean(predicted_classes == true_classes)
train_accuracies.append(accuracy)
print(f"Epoch {epoch + 1}/{num_epoch}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")
if verbose:
tqdm.write(f"Epoch {epoch + 1}/{num_epoch} - Loss: {loss:.4f} - Accuracy: {accuracy:.4f}") # We modify the text to display the loss with tqdm.write
if return_loss:
return w1, b1, w2, b2, train_accuracies, losses
return w1, b1, w2, b2, train_accuracies
def test_mlp(w1, b1, w2, b2, data_test, labels_test):
......@@ -246,7 +302,7 @@ def test_mlp(w1, b1, w2, b2, data_test, labels_test):
z1 = np.matmul(a0, w1) + b1
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
a2 = 1 / (1 + np.exp(-z2))
a2 = softmax(z2)
predictions = a2
# Compute accuracy
......@@ -256,7 +312,29 @@ def test_mlp(w1, b1, w2, b2, data_test, labels_test):
return test_accuracy
def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch):
def init_params(n_features,n_outputs,d_h):
"""
Initialize the weights and bias
"""
w1 =(2 * np.random.rand(n_features, d_h) - 1) * 0.01
b1 = np.zeros((1, d_h))
w2 = (2 * np.random.rand(d_h, n_outputs) - 1) * 0.01
b2 = np.zeros((1, n_outputs))
return w1,b1,w2,b2
def run_mlp_training(data_train,
labels_train,
data_test,
labels_test,
d_h,
learning_rate,
num_epoch,
return_loss = False,
verbose = False
):
"""
Train an MLP classifier and return the training accuracies across epochs and the final testing accuracy.
......@@ -276,6 +354,10 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear
Learning rate of the optimizer.
num_epoch : int
Number of training epochs.
return_loss : bool
If True, return the loss across epochs.
verbose : bool
If True, display the loss and accuracy at each epoch.
Returns
-------
......@@ -287,23 +369,62 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear
n_features = data_train.shape[1]
n_outputs = labels_train.shape[1]
# Initialize weights and biases
w1 = np.random.randn(n_features, d_h) * 0.01
b1 = np.zeros((1, d_h))
w2 = np.random.randn(d_h, n_outputs) * 0.01
b2 = np.zeros((1, n_outputs))
w1,b1,w2,b2 = init_params(n_features=n_features,
n_outputs=n_outputs,
d_h=d_h)
# Train the MLP
w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
if return_loss:
w1, b1, w2, b2, train_accuracies, losses = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch, return_loss, verbose)
else :
w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch, return_loss, verbose)
# Test the MLP
test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
if return_loss:
return train_accuracies, test_accuracy, losses
return train_accuracies, test_accuracy
def Z_score_normalize(data_train, data_test):
"""
Normalize the training and testing data.
Parameters
----------
data_train : np.ndarray
Training data of shape (n_train, d).
data_test : np.ndarray
Testing data of shape (n_test, d).
Returns
-------
data_train_normalized : np.ndarray
Normalized training data of shape (n_train, d).
data_test_normalized : np.ndarray
Normalized testing data of shape (n_test, d).
"""
# Compute the mean and standard deviation of the training data
mean = np.mean(data_train, axis=0)
std = np.std(data_train, axis=0)
# Normalize the training data
data_train_normalized = (data_train - mean) / std
# Normalize the testing data
data_test_normalized = (data_test - mean) / std
return data_train_normalized, data_test_normalized
if __name__ == "__main__":
# Test de one-hot encoding
array = np.array([0, 1, 2, 1, 0])
num_classes = 3
one_hot_matrix = one_hot(array)
print(one_hot_matrix)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment