From 385f8348b0ae1d68ab42af73c2875ee870baa398 Mon Sep 17 00:00:00 2001 From: selalimi <saraelalami2001@gmail.com> Date: Sat, 4 Nov 2023 23:08:01 -0400 Subject: [PATCH] Add mlp.py --- mlp.py | 353 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100644 mlp.py diff --git a/mlp.py b/mlp.py new file mode 100644 index 0000000..17ed264 --- /dev/null +++ b/mlp.py @@ -0,0 +1,353 @@ +import numpy as np +import matplotlib.pyplot as plt +import plotly.express as px +import plotly.io as pio + + +N = 30 # number of input data +d_in = 3 # input dimension +d_h = 3 # number of neurons in the hidden layer +d_out = 2 # output dimension (number of neurons of the output layer) +learning_rate = 0.1 # set the learning rate +num_epochs=100 + +# Random initialization of the network weights and biaises +def initialization(d_in,d_h,d_out): + np.random.seed(10) # To get the same random values + W1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights + b1 = np.zeros((1, d_h)) # first layer biaises + W2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights + b2 = np.zeros((1, d_out)) # second layer biaises + return W1,b1,W2,b2 + +data = np.random.rand(N, d_in) # create a random data +targets = np.random.rand(N, d_out) # create a random targets + +# Define the sigmoid activation function +def sigmoid(x,derivate): + if derivate==False: + return 1 / (1 + np.exp(-x)) + else: + return x*(1-x) + + +# Define the softmax activation function +def softmax(x,derivate): + if derivate == False : + return np.exp(x) / np.exp(np.array(x)).sum(axis=-1, keepdims=True) + else : + return x*(1-x) + +#Definir les métriques : +def loss_metrics(predictions, targets, metric, status): + if metric == "MSE": + if status == "forward": + return np.mean((predictions - targets) ** 2) + elif status == "backward": + return 2 * (predictions - targets) / len(predictions) # Gradient of MSE loss + elif metric == "BCE": + # Binary Cross-Entropy Loss + epsilon = 1e-15 # Small constant to prevent log(0) + predictions = np.clip(predictions, epsilon, 1 - epsilon) + if status == "forward": + return - (targets * np.log(predictions) + (1 - targets) * np.log(1 - predictions)).mean() + elif status == "backward": + return (predictions - targets) / ((1 - predictions) * predictions) # Gradient of BCE loss + else: + raise ValueError("Metric not supported: " + metric) + +# learn_once_mse +""" + Update the weights and biases of the network for one gradient descent step using Mean Squared Error (MSE) loss. + + Parameters: + - w1: Weight matrix of the first layer (shape: d_in x d_h). + - b1: Bias vector of the first layer (shape: 1 x d_h). + - w2: Weight matrix of the second layer (shape: d_h x d_out). + - b2: Bias vector of the second layer (shape: 1 x d_out). + - data: Input data matrix (shape: batch_size x d_in). + - targets: Target output matrix (shape: batch_size x d_out). + - learning_rate: Learning rate for gradient descent. + + Returns: + - updated_W1: Updated weight matrix of the first layer. + - updated_b1: Updated bias vector of the first layer. + - updated_w2: Updated weight matrix of the second layer. + - updated_b2: Updated bias vector of the second layer. + - loss: Mean Squared Error (MSE) loss for monitoring. + """ + +def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate): + # Forward pass + # Calculate the input and output of the hidden layer + hidden_layer_input = np.matmul(data, W1) + b1 + hidden_layer_output = sigmoid(hidden_layer_input, derivate=False) # Apply the sigmoid activation + + # Calculate the input and output of the output layer + output_layer_input = np.matmul(hidden_layer_output, W2) + b2 + output_layer_output = softmax(output_layer_input, derivate=False) # Apply the softmax activation + + # Backpropagation phase + # Calculate the error at the output layer + output_error = output_layer_output - targets + + # Calculate gradients for the output layer + output_layer_gradients = output_error * softmax(output_layer_output, derivate=True) + + # Update weights and biases of the output layer + updated_W2 = W2 - learning_rate * np.dot(hidden_layer_output.T, output_layer_gradients) / data.shape[0] + updated_b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True) + + # Calculate the error at the hidden layer + hidden_layer_error = np.dot(output_layer_gradients, W2.T) + + # Calculate gradients for the hidden layer + hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True) + + # Update weights and biases of the hidden layer + updated_W1 = W1 - learning_rate * np.dot(data.T, hidden_layer_gradients) / data.shape[0] + updated_b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True) + + # Calculate the loss using the specified metric + loss = loss_metrics(output_layer_output, targets,metric="MSE",status="forward") + + return updated_W1, updated_b1, updated_W2, updated_b2, loss + +#One Hot Function : +def one_hot(targets): + """ + one_hot_encode takes an arrayy of target values and returns the corresponding one-hot encoded matrix. + + Parameters: + - targets: An arrayy of target values. + + Returns: + - one_hot_matrix: A one-hot encoded matrix where each row corresponds to a target value. + """ + num_classes = np.unique(targets).shape[0] # Determine the number of unique classes in the target arrayy + num_samples = targets.shape[0] # Get the number of samples in the target arrayy + + one_hot_matrix = np.zeros((num_samples, num_classes)) # Initialize a matrix of zeros + + for i in range(num_samples): + target_class = targets[i] + one_hot_matrix[i, target_class] = 1 # Set the corresponding class index to 1 + + return one_hot_matrix + +#learn_once_cross_entropy +def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate): + """ + Perform one gradient descent step using binary cross-entropy loss. + + Parameters: + - W1, b1, W2, b2: Weights and biases of the network. + - data: Input data matrix of shape (batch_size x d_in). + - targets: Target output matrix of shape (batch_size x d_out). + - learning_rate: Learning rate for gradient descent. + - metrics: Specifies the loss metric (default is Binary Cross Entropy). + + Returns: + - Updated weights and biases (W1, b1, W2, b2) of the network. + - Loss value for monitoring. + """ + + # Forward pass + # Implement feedforward propagation on the hidden layer + Z1 = np.matmul(data, W1) + b1 + A1 = sigmoid(Z1, derivate=False) # Apply the Sigmoid activation function + + # Implement feedforward propagation on the output layer + Z2 = np.matmul(A1, W2) + b2 + A2 = softmax(Z2, derivate=False) # Apply the Softmax activation function + + # Backpropagation phase + # Updating W2 and b2 + E2 = A2 - targets + dW2 = E2 * softmax(A2, derivate=True) + W2_update = np.dot(A1.T, dW2) / N + update_b2 = (1 / A1.shape[1]) * dW2.sum(axis=0, keepdims=True) + + # Updating W1 and b1 + E1 = np.dot(dW2, W2.T) + dW1 = E1 * sigmoid(A1, derivate=True) + W1_update = np.dot(data.T, dW1) / N + update_b1 = (1 / data.shape[1]) * dW1.sum(axis=0, keepdims=True) + + # Gradient descent + W2 = W2 - learning_rate * W2_update + W1 = W1 - learning_rate * W1_update + b2 = b2 - learning_rate * update_b2 + b1 = b1 - learning_rate * update_b1 + + # Compute loss (Binary Cross Entropy) + loss = loss_metrics(A2, targets,metric="BCE", status="forward") + + return W1, b1, W2, b2, loss + +def calculate_accuracy(predictions, actual_values): + """ + calculate_accuracy: Compute the accuracy of the model. + + Parameters: + - predictions: Predicted values. + - actual_values: Ground truth observations. + + Returns: + - Accuracy as a float. + """ + correct_predictions = predictions.argmax(axis=1) == actual_values.argmax(axis=1) + accuracy = correct_predictions.mean() + return accuracy + +def train_mlp(W1, b1, W2, b2, data, targets, learning_rate): + """ + Perform training steps for a specified number of epochs. + + Parameters: + - W1, b1, W2, b2: Weights and biases of the network. + - data: Input data matrix of shape (batch_size x d_in). + - targets: Target output matrix of shape (batch_size x d_out). + - learning_rate: Learning rate for gradient descent. + - num_epochs: Number of training epochs. + - metrics: Specifies the loss metric (default is Binary Cross Entropy). + + Returns: + - Updated weights and biases (W1, b1, W2, b2) of the network. + - List of training accuracies across epochs as a list of floats. + """ + + # Forward pass + hidden_layer_input = np.matmul(data, W1) + b1 + hidden_layer_output = sigmoid(hidden_layer_input, derivate=False) + + output_layer_input = np.matmul(hidden_layer_output, W2) + b2 + output_layer_output = softmax(output_layer_input, derivate=False) + + N = data.shape[0] + + # Backpropagation phase + output_error = output_layer_output - targets + output_layer_gradients = output_error * softmax(output_layer_output, derivate=True) + + W2_update = np.dot(hidden_layer_output.T, output_layer_gradients) / N + update_b2 = (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True) + + hidden_layer_error = np.dot(output_layer_gradients, W2.T) + hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True) + + W1_update = np.dot(data.T, hidden_layer_gradients) / N + update_b1 = (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True) + + # Gradient descent + W2 = W2 - learning_rate * W2_update + W1 = W1 - learning_rate * W1_update + b2 = b2 - learning_rate * update_b2 + b1 = b1 - learning_rate * update_b1 + + # Calculate loss and accuracy + loss = loss_metrics(output_layer_output, targets,metric="BCE",status="forward") + + train_accuracies=calculate_accuracy(output_layer_output, targets) + + return W1, b1, W2, b2, loss, train_accuracies + +def test_mlp(W1, b1, W2, b2, data_test, labels_test): + """ + Evaluate the network's performance on the test set. + + Parameters: + - W1, b1, W2, b2: Weights and biases of the network. + - data_test: Test data matrix of shape (batch_size x d_in). + - labels_test: True labels for the test data. + + Returns: + - test_accuracy: The testing accuracy as a float. + """ + # Forward pass + hidden_layer_input = np.matmul(data_test, W1) + b1 + hidden_layer_output = sigmoid(hidden_layer_input, derivate=False) + + output_layer_input = np.matmul(hidden_layer_output, W2) + b2 + output_layer_output = softmax(output_layer_input, derivate=False) + + # Compute testing accuracy + test_accuracy = calculate_accuracy(output_layer_output, labels_test) + return test_accuracy + +def run_mlp_training(X_train, labels_train, data_test, labels_test, num_hidden_units, learning_rate, num_epochs): + """ + Train an MLP classifier and evaluate its performance. + + Parameters: + - X_train: Training data matrix of shape (batch_size x input_dimension). + - labels_train: True labels for the training data. + - data_test: Test data matrix of shape (batch_size x input_dimension). + - labels_test: True labels for the test data. + - num_hidden_units: Number of neurons in the hidden layer. + - learning_rate: The learning rate for gradient descent. + - num_epochs: The number of training epochs. + + Returns: + - train_accuracies: List of training accuracies across epochs. + - test_accuracy: The final testing accuracy. + """ + input_dimension = X_train.shape[1] + output_dimension = np.unique(labels_train).shape[0] # Number of classes + + # Initialize weights and biases + W1, b1, W2, b2 = initialization(input_dimension, num_hidden_units, output_dimension) + + train_accuracies = [] # List to store training accuracies + + # Training loop + for epoch in range(num_epochs): + W1, b1, W2, b2, loss, train_accuracy = train_mlp(W1, b1, W2, b2, X_train, one_hot(labels_train), learning_rate) + test_accuracy = test_mlp(W1, b1, W2, b2, data_test, one_hot(labels_test)) + train_accuracies.append(train_accuracy) + + print("Epoch {}/{}".format(epoch + 1, num_epochs)) + print("Train Accuracy: {:.6f} Test Accuracy: {:.6f}".format(round(train_accuracy, 6), round(test_accuracy, 6))) + + return train_accuracies, test_accuracy + +# plot_ANN + +import matplotlib.pyplot as plt + +def plot_ANN(X_train, y_train, X_test, y_test): + """ + Plot the variation of accuracy in terms of the number of epochs. + + Parameters: + - X_train: Training data matrix. + - y_train: True labels for the training data. + - X_test: Test data matrix. + - y_test: True labels for the test data. + """ + # Train an MLP and obtain training accuracies and final test accuracy + train_accuracies, test_accuracy = run_mlp_training(X_train, y_train, X_test, y_test, num_hidden_units=64, learning_rate=0.1, num_epochs=100) + + # Display the test accuracy + print("Test Set Accuracy: {}".format(test_accuracy)) + + # Create a Matplotlib plot + plt.plot(list(range(1, len(train_accuracies) + 1)), train_accuracies) + plt.title('Accuracy Variation Over Epochs') + plt.xlabel('Epoch') + plt.ylabel('Accuracy') + + # Save the figure (optional) + plt.savefig("Results/mlp.png") + + # Show the plot (optional) + plt.show() + + + + + + + + + -- GitLab