From 565d9df21e0f9275d7be0564ec53464249c99e28 Mon Sep 17 00:00:00 2001 From: selalimi <saraelalami2001@gmail.com> Date: Thu, 9 Nov 2023 21:16:48 -0500 Subject: [PATCH] Update mlp file --- mlp.py | 53 ++++++++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/mlp.py b/mlp.py index 17ed264..7812729 100644 --- a/mlp.py +++ b/mlp.py @@ -1,14 +1,12 @@ import numpy as np import matplotlib.pyplot as plt -import plotly.express as px -import plotly.io as pio N = 30 # number of input data d_in = 3 # input dimension d_h = 3 # number of neurons in the hidden layer d_out = 2 # output dimension (number of neurons of the output layer) -learning_rate = 0.1 # set the learning rate +learning_rate = 0.1 num_epochs=100 # Random initialization of the network weights and biaises @@ -70,10 +68,10 @@ def loss_metrics(predictions, targets, metric, status): - learning_rate: Learning rate for gradient descent. Returns: - - updated_W1: Updated weight matrix of the first layer. - - updated_b1: Updated bias vector of the first layer. - - updated_w2: Updated weight matrix of the second layer. - - updated_b2: Updated bias vector of the second layer. + - W1: Updated weight matrix of the first layer. + - b1: Updated bias vector of the first layer. + - w2: Updated weight matrix of the second layer. + - b2: Updated bias vector of the second layer. - loss: Mean Squared Error (MSE) loss for monitoring. """ @@ -95,8 +93,8 @@ def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate): output_layer_gradients = output_error * softmax(output_layer_output, derivate=True) # Update weights and biases of the output layer - updated_W2 = W2 - learning_rate * np.dot(hidden_layer_output.T, output_layer_gradients) / data.shape[0] - updated_b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True) + W2 = W2 - learning_rate * np.dot(hidden_layer_output.T, output_layer_gradients) / data.shape[0] + b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True) # Calculate the error at the hidden layer hidden_layer_error = np.dot(output_layer_gradients, W2.T) @@ -105,13 +103,13 @@ def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate): hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True) # Update weights and biases of the hidden layer - updated_W1 = W1 - learning_rate * np.dot(data.T, hidden_layer_gradients) / data.shape[0] - updated_b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True) + W1 = W1 - learning_rate * np.dot(data.T, hidden_layer_gradients) / data.shape[0] + b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True) # Calculate the loss using the specified metric loss = loss_metrics(output_layer_output, targets,metric="MSE",status="forward") - return updated_W1, updated_b1, updated_W2, updated_b2, loss + return W1, b1, W2, b2, loss #One Hot Function : def one_hot(targets): @@ -136,16 +134,16 @@ def one_hot(targets): return one_hot_matrix #learn_once_cross_entropy -def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate): + +def learn_once_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate): """ - Perform one gradient descent step using binary cross-entropy loss. + Perform one gradient descent step using binary cross-entropy loss. Parameters: - W1, b1, W2, b2: Weights and biases of the network. - data: Input data matrix of shape (batch_size x d_in). - targets: Target output matrix of shape (batch_size x d_out). - learning_rate: Learning rate for gradient descent. - - metrics: Specifies the loss metric (default is Binary Cross Entropy). Returns: - Updated weights and biases (W1, b1, W2, b2) of the network. @@ -154,24 +152,24 @@ def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate # Forward pass # Implement feedforward propagation on the hidden layer - Z1 = np.matmul(data, W1) + b1 - A1 = sigmoid(Z1, derivate=False) # Apply the Sigmoid activation function + hidden_layer_input = np.matmul(data, W1) + b1 + hidden_layer_output = sigmoid(hidden_layer_input, derivate=False) # Apply the Sigmoid activation function # Implement feedforward propagation on the output layer - Z2 = np.matmul(A1, W2) + b2 - A2 = softmax(Z2, derivate=False) # Apply the Softmax activation function + output_layer_input = np.matmul(hidden_layer_output, W2) + b2 + output_layer_output = softmax(output_layer_input, derivate=False) # Apply the Softmax activation function # Backpropagation phase # Updating W2 and b2 - E2 = A2 - targets - dW2 = E2 * softmax(A2, derivate=True) - W2_update = np.dot(A1.T, dW2) / N - update_b2 = (1 / A1.shape[1]) * dW2.sum(axis=0, keepdims=True) + output_error = output_layer_output - targets + dW2 = output_error * softmax(output_layer_output, derivate=True) + W2_update = np.dot(hidden_layer_output.T, dW2) / data.shape[0] + update_b2 = (1 / hidden_layer_output.shape[1]) * dW2.sum(axis=0, keepdims=True) # Updating W1 and b1 - E1 = np.dot(dW2, W2.T) - dW1 = E1 * sigmoid(A1, derivate=True) - W1_update = np.dot(data.T, dW1) / N + hidden_layer_error = np.dot(dW2, W2.T) + dW1 = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True) + W1_update = np.dot(data.T, dW1) / data.shape[0] update_b1 = (1 / data.shape[1]) * dW1.sum(axis=0, keepdims=True) # Gradient descent @@ -181,10 +179,11 @@ def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate b1 = b1 - learning_rate * update_b1 # Compute loss (Binary Cross Entropy) - loss = loss_metrics(A2, targets,metric="BCE", status="forward") + loss = loss_metrics(output_layer_output, targets, metric="BCE", status="forward") return W1, b1, W2, b2, loss + def calculate_accuracy(predictions, actual_values): """ calculate_accuracy: Compute the accuracy of the model. -- GitLab