Skip to content
Snippets Groups Projects
Commit 565d9df2 authored by selalimi's avatar selalimi
Browse files

Update mlp file

parent efa3fceb
Branches
Tags
No related merge requests found
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
N = 30 # number of input data N = 30 # number of input data
d_in = 3 # input dimension d_in = 3 # input dimension
d_h = 3 # number of neurons in the hidden layer d_h = 3 # number of neurons in the hidden layer
d_out = 2 # output dimension (number of neurons of the output layer) d_out = 2 # output dimension (number of neurons of the output layer)
learning_rate = 0.1 # set the learning rate learning_rate = 0.1
num_epochs=100 num_epochs=100
# Random initialization of the network weights and biaises # Random initialization of the network weights and biaises
...@@ -70,10 +68,10 @@ def loss_metrics(predictions, targets, metric, status): ...@@ -70,10 +68,10 @@ def loss_metrics(predictions, targets, metric, status):
- learning_rate: Learning rate for gradient descent. - learning_rate: Learning rate for gradient descent.
Returns: Returns:
- updated_W1: Updated weight matrix of the first layer. - W1: Updated weight matrix of the first layer.
- updated_b1: Updated bias vector of the first layer. - b1: Updated bias vector of the first layer.
- updated_w2: Updated weight matrix of the second layer. - w2: Updated weight matrix of the second layer.
- updated_b2: Updated bias vector of the second layer. - b2: Updated bias vector of the second layer.
- loss: Mean Squared Error (MSE) loss for monitoring. - loss: Mean Squared Error (MSE) loss for monitoring.
""" """
...@@ -95,8 +93,8 @@ def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate): ...@@ -95,8 +93,8 @@ def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate):
output_layer_gradients = output_error * softmax(output_layer_output, derivate=True) output_layer_gradients = output_error * softmax(output_layer_output, derivate=True)
# Update weights and biases of the output layer # Update weights and biases of the output layer
updated_W2 = W2 - learning_rate * np.dot(hidden_layer_output.T, output_layer_gradients) / data.shape[0] W2 = W2 - learning_rate * np.dot(hidden_layer_output.T, output_layer_gradients) / data.shape[0]
updated_b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True) b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True)
# Calculate the error at the hidden layer # Calculate the error at the hidden layer
hidden_layer_error = np.dot(output_layer_gradients, W2.T) hidden_layer_error = np.dot(output_layer_gradients, W2.T)
...@@ -105,13 +103,13 @@ def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate): ...@@ -105,13 +103,13 @@ def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate):
hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True) hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
# Update weights and biases of the hidden layer # Update weights and biases of the hidden layer
updated_W1 = W1 - learning_rate * np.dot(data.T, hidden_layer_gradients) / data.shape[0] W1 = W1 - learning_rate * np.dot(data.T, hidden_layer_gradients) / data.shape[0]
updated_b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True) b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True)
# Calculate the loss using the specified metric # Calculate the loss using the specified metric
loss = loss_metrics(output_layer_output, targets,metric="MSE",status="forward") loss = loss_metrics(output_layer_output, targets,metric="MSE",status="forward")
return updated_W1, updated_b1, updated_W2, updated_b2, loss return W1, b1, W2, b2, loss
#One Hot Function : #One Hot Function :
def one_hot(targets): def one_hot(targets):
...@@ -136,7 +134,8 @@ def one_hot(targets): ...@@ -136,7 +134,8 @@ def one_hot(targets):
return one_hot_matrix return one_hot_matrix
#learn_once_cross_entropy #learn_once_cross_entropy
def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate):
def learn_once_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate):
""" """
Perform one gradient descent step using binary cross-entropy loss. Perform one gradient descent step using binary cross-entropy loss.
...@@ -145,7 +144,6 @@ def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate ...@@ -145,7 +144,6 @@ def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate
- data: Input data matrix of shape (batch_size x d_in). - data: Input data matrix of shape (batch_size x d_in).
- targets: Target output matrix of shape (batch_size x d_out). - targets: Target output matrix of shape (batch_size x d_out).
- learning_rate: Learning rate for gradient descent. - learning_rate: Learning rate for gradient descent.
- metrics: Specifies the loss metric (default is Binary Cross Entropy).
Returns: Returns:
- Updated weights and biases (W1, b1, W2, b2) of the network. - Updated weights and biases (W1, b1, W2, b2) of the network.
...@@ -154,24 +152,24 @@ def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate ...@@ -154,24 +152,24 @@ def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate
# Forward pass # Forward pass
# Implement feedforward propagation on the hidden layer # Implement feedforward propagation on the hidden layer
Z1 = np.matmul(data, W1) + b1 hidden_layer_input = np.matmul(data, W1) + b1
A1 = sigmoid(Z1, derivate=False) # Apply the Sigmoid activation function hidden_layer_output = sigmoid(hidden_layer_input, derivate=False) # Apply the Sigmoid activation function
# Implement feedforward propagation on the output layer # Implement feedforward propagation on the output layer
Z2 = np.matmul(A1, W2) + b2 output_layer_input = np.matmul(hidden_layer_output, W2) + b2
A2 = softmax(Z2, derivate=False) # Apply the Softmax activation function output_layer_output = softmax(output_layer_input, derivate=False) # Apply the Softmax activation function
# Backpropagation phase # Backpropagation phase
# Updating W2 and b2 # Updating W2 and b2
E2 = A2 - targets output_error = output_layer_output - targets
dW2 = E2 * softmax(A2, derivate=True) dW2 = output_error * softmax(output_layer_output, derivate=True)
W2_update = np.dot(A1.T, dW2) / N W2_update = np.dot(hidden_layer_output.T, dW2) / data.shape[0]
update_b2 = (1 / A1.shape[1]) * dW2.sum(axis=0, keepdims=True) update_b2 = (1 / hidden_layer_output.shape[1]) * dW2.sum(axis=0, keepdims=True)
# Updating W1 and b1 # Updating W1 and b1
E1 = np.dot(dW2, W2.T) hidden_layer_error = np.dot(dW2, W2.T)
dW1 = E1 * sigmoid(A1, derivate=True) dW1 = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
W1_update = np.dot(data.T, dW1) / N W1_update = np.dot(data.T, dW1) / data.shape[0]
update_b1 = (1 / data.shape[1]) * dW1.sum(axis=0, keepdims=True) update_b1 = (1 / data.shape[1]) * dW1.sum(axis=0, keepdims=True)
# Gradient descent # Gradient descent
...@@ -181,10 +179,11 @@ def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate ...@@ -181,10 +179,11 @@ def learn_once_binary_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate
b1 = b1 - learning_rate * update_b1 b1 = b1 - learning_rate * update_b1
# Compute loss (Binary Cross Entropy) # Compute loss (Binary Cross Entropy)
loss = loss_metrics(A2, targets,metric="BCE", status="forward") loss = loss_metrics(output_layer_output, targets, metric="BCE", status="forward")
return W1, b1, W2, b2, loss return W1, b1, W2, b2, loss
def calculate_accuracy(predictions, actual_values): def calculate_accuracy(predictions, actual_values):
""" """
calculate_accuracy: Compute the accuracy of the model. calculate_accuracy: Compute the accuracy of the model.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment