Skip to content
Snippets Groups Projects
Commit 385f8348 authored by selalimi's avatar selalimi
Browse files

Add mlp.py

parent 54bf1931
Branches
No related tags found
No related merge requests found
mlp.py 0 → 100644
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
N = 30 # number of input data
d_in = 3 # input dimension
d_h = 3 # number of neurons in the hidden layer
d_out = 2 # output dimension (number of neurons of the output layer)
learning_rate = 0.1 # set the learning rate
num_epochs=100
# Random initialization of the network weights and biaises
def initialization(d_in,d_h,d_out):
np.random.seed(10) # To get the same random values
W1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
W2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
return W1,b1,W2,b2
data = np.random.rand(N, d_in) # create a random data
targets = np.random.rand(N, d_out) # create a random targets
# Define the sigmoid activation function
def sigmoid(x,derivate):
if derivate==False:
return 1 / (1 + np.exp(-x))
else:
return x*(1-x)
# Define the softmax activation function
def softmax(x,derivate):
if derivate == False :
return np.exp(x) / np.exp(np.array(x)).sum(axis=-1, keepdims=True)
else :
return x*(1-x)
#Definir les métriques :
def loss_metrics(predictions, targets, metric, status):
if metric == "MSE":
if status == "forward":
return np.mean((predictions - targets) ** 2)
elif status == "backward":
return 2 * (predictions - targets) / len(predictions) # Gradient of MSE loss
elif metric == "BCE":
# Binary Cross-Entropy Loss
epsilon = 1e-15 # Small constant to prevent log(0)
predictions = np.clip(predictions, epsilon, 1 - epsilon)
if status == "forward":
return - (targets * np.log(predictions) + (1 - targets) * np.log(1 - predictions)).mean()
elif status == "backward":
return (predictions - targets) / ((1 - predictions) * predictions) # Gradient of BCE loss
else:
raise ValueError("Metric not supported: " + metric)
# learn_once_mse
"""
Update the weights and biases of the network for one gradient descent step using Mean Squared Error (MSE) loss.
Parameters:
- w1: Weight matrix of the first layer (shape: d_in x d_h).
- b1: Bias vector of the first layer (shape: 1 x d_h).
- w2: Weight matrix of the second layer (shape: d_h x d_out).
- b2: Bias vector of the second layer (shape: 1 x d_out).
- data: Input data matrix (shape: batch_size x d_in).
- targets: Target output matrix (shape: batch_size x d_out).
- learning_rate: Learning rate for gradient descent.
Returns:
- updated_W1: Updated weight matrix of the first layer.
- updated_b1: Updated bias vector of the first layer.
- updated_w2: Updated weight matrix of the second layer.
- updated_b2: Updated bias vector of the second layer.
- loss: Mean Squared Error (MSE) loss for monitoring.
"""
def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate):
# Forward pass
# Calculate the input and output of the hidden layer
hidden_layer_input = np.matmul(data, W1) + b1
hidden_layer_output = sigmoid(hidden_layer_input, derivate=False) # Apply the sigmoid activation
# Calculate the input and output of the output layer
output_layer_input = np.matmul(hidden_layer_output, W2) + b2
output_layer_output = softmax(output_layer_input, derivate=False) # Apply the softmax activation
# Backpropagation phase
# Calculate the error at the output layer
output_error = output_layer_output - targets
# Calculate gradients for the output layer
output_layer_gradients = output_error * softmax(output_layer_output, derivate=True)
# Update weights and biases of the output layer
updated_W2 = W2 - learning_rate * np.dot(hidden_layer_output.T, output_layer_gradients) / data.shape[0]
updated_b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True)
# Calculate the error at the hidden layer
hidden_layer_error = np.dot(output_layer_gradients, W2.T)
# Calculate gradients for the hidden layer
hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
# Update weights and biases of the hidden layer
updated_W1 = W1 - learning_rate * np.dot(data.T, hidden_layer_gradients) / data.shape[0]
updated_b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True)
# Calculate the loss using the specified metric
loss = loss_metrics(output_layer_output, targets,metric="MSE",status="forward")
return updated_W1, updated_b1, updated_W2, updated_b2, loss
#One Hot Function :
def one_hot(targets):
"""
one_hot_encode takes an arrayy of target values and returns the corresponding one-hot encoded matrix.
Parameters:
- targets: An arrayy of target values.
Returns:
- one_hot_matrix: A one-hot encoded matrix where each row corresponds to a target value.
"""
num_classes = np.unique(targets).shape[0] # Determine the number of unique classes in the target arrayy
num_samples = targets.shape[0] # Get the number of samples in the target arrayy
one_hot_matrix = np.zeros((num_samples, num_classes)) # Initialize a matrix of zeros
for i in range(num_samples):
target_class = targets[i]
one_hot_matrix[i, target_class] = 1 # Set the corresponding class index to 1
return one_hot_matrix
#learn_once_cross_entropy
def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate):
"""
Perform one gradient descent step using binary cross-entropy loss.
Parameters:
- W1, b1, W2, b2: Weights and biases of the network.
- data: Input data matrix of shape (batch_size x d_in).
- targets: Target output matrix of shape (batch_size x d_out).
- learning_rate: Learning rate for gradient descent.
- metrics: Specifies the loss metric (default is Binary Cross Entropy).
Returns:
- Updated weights and biases (W1, b1, W2, b2) of the network.
- Loss value for monitoring.
"""
# Forward pass
# Implement feedforward propagation on the hidden layer
Z1 = np.matmul(data, W1) + b1
A1 = sigmoid(Z1, derivate=False) # Apply the Sigmoid activation function
# Implement feedforward propagation on the output layer
Z2 = np.matmul(A1, W2) + b2
A2 = softmax(Z2, derivate=False) # Apply the Softmax activation function
# Backpropagation phase
# Updating W2 and b2
E2 = A2 - targets
dW2 = E2 * softmax(A2, derivate=True)
W2_update = np.dot(A1.T, dW2) / N
update_b2 = (1 / A1.shape[1]) * dW2.sum(axis=0, keepdims=True)
# Updating W1 and b1
E1 = np.dot(dW2, W2.T)
dW1 = E1 * sigmoid(A1, derivate=True)
W1_update = np.dot(data.T, dW1) / N
update_b1 = (1 / data.shape[1]) * dW1.sum(axis=0, keepdims=True)
# Gradient descent
W2 = W2 - learning_rate * W2_update
W1 = W1 - learning_rate * W1_update
b2 = b2 - learning_rate * update_b2
b1 = b1 - learning_rate * update_b1
# Compute loss (Binary Cross Entropy)
loss = loss_metrics(A2, targets,metric="BCE", status="forward")
return W1, b1, W2, b2, loss
def calculate_accuracy(predictions, actual_values):
"""
calculate_accuracy: Compute the accuracy of the model.
Parameters:
- predictions: Predicted values.
- actual_values: Ground truth observations.
Returns:
- Accuracy as a float.
"""
correct_predictions = predictions.argmax(axis=1) == actual_values.argmax(axis=1)
accuracy = correct_predictions.mean()
return accuracy
def train_mlp(W1, b1, W2, b2, data, targets, learning_rate):
"""
Perform training steps for a specified number of epochs.
Parameters:
- W1, b1, W2, b2: Weights and biases of the network.
- data: Input data matrix of shape (batch_size x d_in).
- targets: Target output matrix of shape (batch_size x d_out).
- learning_rate: Learning rate for gradient descent.
- num_epochs: Number of training epochs.
- metrics: Specifies the loss metric (default is Binary Cross Entropy).
Returns:
- Updated weights and biases (W1, b1, W2, b2) of the network.
- List of training accuracies across epochs as a list of floats.
"""
# Forward pass
hidden_layer_input = np.matmul(data, W1) + b1
hidden_layer_output = sigmoid(hidden_layer_input, derivate=False)
output_layer_input = np.matmul(hidden_layer_output, W2) + b2
output_layer_output = softmax(output_layer_input, derivate=False)
N = data.shape[0]
# Backpropagation phase
output_error = output_layer_output - targets
output_layer_gradients = output_error * softmax(output_layer_output, derivate=True)
W2_update = np.dot(hidden_layer_output.T, output_layer_gradients) / N
update_b2 = (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True)
hidden_layer_error = np.dot(output_layer_gradients, W2.T)
hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
W1_update = np.dot(data.T, hidden_layer_gradients) / N
update_b1 = (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True)
# Gradient descent
W2 = W2 - learning_rate * W2_update
W1 = W1 - learning_rate * W1_update
b2 = b2 - learning_rate * update_b2
b1 = b1 - learning_rate * update_b1
# Calculate loss and accuracy
loss = loss_metrics(output_layer_output, targets,metric="BCE",status="forward")
train_accuracies=calculate_accuracy(output_layer_output, targets)
return W1, b1, W2, b2, loss, train_accuracies
def test_mlp(W1, b1, W2, b2, data_test, labels_test):
"""
Evaluate the network's performance on the test set.
Parameters:
- W1, b1, W2, b2: Weights and biases of the network.
- data_test: Test data matrix of shape (batch_size x d_in).
- labels_test: True labels for the test data.
Returns:
- test_accuracy: The testing accuracy as a float.
"""
# Forward pass
hidden_layer_input = np.matmul(data_test, W1) + b1
hidden_layer_output = sigmoid(hidden_layer_input, derivate=False)
output_layer_input = np.matmul(hidden_layer_output, W2) + b2
output_layer_output = softmax(output_layer_input, derivate=False)
# Compute testing accuracy
test_accuracy = calculate_accuracy(output_layer_output, labels_test)
return test_accuracy
def run_mlp_training(X_train, labels_train, data_test, labels_test, num_hidden_units, learning_rate, num_epochs):
"""
Train an MLP classifier and evaluate its performance.
Parameters:
- X_train: Training data matrix of shape (batch_size x input_dimension).
- labels_train: True labels for the training data.
- data_test: Test data matrix of shape (batch_size x input_dimension).
- labels_test: True labels for the test data.
- num_hidden_units: Number of neurons in the hidden layer.
- learning_rate: The learning rate for gradient descent.
- num_epochs: The number of training epochs.
Returns:
- train_accuracies: List of training accuracies across epochs.
- test_accuracy: The final testing accuracy.
"""
input_dimension = X_train.shape[1]
output_dimension = np.unique(labels_train).shape[0] # Number of classes
# Initialize weights and biases
W1, b1, W2, b2 = initialization(input_dimension, num_hidden_units, output_dimension)
train_accuracies = [] # List to store training accuracies
# Training loop
for epoch in range(num_epochs):
W1, b1, W2, b2, loss, train_accuracy = train_mlp(W1, b1, W2, b2, X_train, one_hot(labels_train), learning_rate)
test_accuracy = test_mlp(W1, b1, W2, b2, data_test, one_hot(labels_test))
train_accuracies.append(train_accuracy)
print("Epoch {}/{}".format(epoch + 1, num_epochs))
print("Train Accuracy: {:.6f} Test Accuracy: {:.6f}".format(round(train_accuracy, 6), round(test_accuracy, 6)))
return train_accuracies, test_accuracy
# plot_ANN
import matplotlib.pyplot as plt
def plot_ANN(X_train, y_train, X_test, y_test):
"""
Plot the variation of accuracy in terms of the number of epochs.
Parameters:
- X_train: Training data matrix.
- y_train: True labels for the training data.
- X_test: Test data matrix.
- y_test: True labels for the test data.
"""
# Train an MLP and obtain training accuracies and final test accuracy
train_accuracies, test_accuracy = run_mlp_training(X_train, y_train, X_test, y_test, num_hidden_units=64, learning_rate=0.1, num_epochs=100)
# Display the test accuracy
print("Test Set Accuracy: {}".format(test_accuracy))
# Create a Matplotlib plot
plt.plot(list(range(1, len(train_accuracies) + 1)), train_accuracies)
plt.title('Accuracy Variation Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
# Save the figure (optional)
plt.savefig("Results/mlp.png")
# Show the plot (optional)
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment