Skip to content
Snippets Groups Projects
Commit b1f3443c authored by Matías Duhalde's avatar Matías Duhalde
Browse files

feat: mlp functions

parent 9d681b50
Branches
No related tags found
No related merge requests found
mlp.py 0 → 100644
import numpy as np
def learn_once_mse(
w1: np.ndarray,
b1: np.ndarray,
w2: np.ndarray,
b2: np.ndarray,
data: np.ndarray,
targets: np.ndarray,
learning_rate: float,
) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float):
"""Perform one step of gradient descent on the given data and targets.
Args:
w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
data (np.ndarray): The data, of shape (N, d_in).
targets (np.ndarray): The targets, of shape (N, d_out).
learning_rate (float): The learning rate.
Returns:
(np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): A tuple containing the updated weights and biases, and the loss.
"""
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
# Backward pass
# Compute gradients
dC_da2 = 2 * (predictions - targets) / predictions.shape[0]
dC_dz2 = dC_da2 * a2 * (1 - a2)
dC_dw2 = np.matmul(a1.T, dC_dz2)
dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True)
dC_da1 = np.matmul(dC_dz2, w2.T)
dC_dz1 = dC_da1 * a1 * (1 - a1)
dC_dw1 = np.matmul(a0.T, dC_dz1)
dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True)
# Update weights and biases
w1 -= learning_rate * dC_dw1
b1 -= learning_rate * dC_db1
w2 -= learning_rate * dC_dw2
b2 -= learning_rate * dC_db2
return w1, b1, w2, b2, loss
def one_hot(labels: np.ndarray) -> np.ndarray:
"""Calculates the one-hot matrix of the given labels.
Args:
labels (np.ndarray): The labels.
Returns:
np.ndarray: The one-hot matrix of the labels.
"""
return np.eye(labels.max() + 1)[labels]
def learn_once_cross_entropy(
w1: np.ndarray,
b1: np.ndarray,
w2: np.ndarray,
b2: np.ndarray,
data: np.ndarray,
labels_train: np.ndarray,
learning_rate: float,
) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float):
"""Perform one step of gradient descent using a binary cross-entropy loss on the given data and targets.
Args:
w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
data (np.ndarray): The data, of shape (N, d_in).
targets (np.ndarray): The targets, of shape (N, d_out).
learning_rate (float): The learning rate.
Returns:
(np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): A tuple containing the updated weights and biases, and the loss.
"""
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
one_hot_targets = one_hot(labels_train)
# Compute loss (Cross Entropy)
# https://arize.com/blog-course/binary-cross-entropy-log-loss/
loss = -np.mean(
one_hot_targets * np.log(predictions)
+ (1 - one_hot_targets) * np.log(1 - predictions)
)
# Backward pass
# Compute gradients
dC_dz2 = a2 - one_hot_targets
dC_dw2 = np.matmul(a1.T, dC_dz2)
dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True)
dC_da1 = np.matmul(dC_dz2, w2.T)
dC_dz1 = dC_da1 * a1 * (1 - a1)
dC_dw1 = np.matmul(a0.T, dC_dz1)
dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True)
# Update weights and biases
w1 -= learning_rate * dC_dw1
b1 -= learning_rate * dC_db1
w2 -= learning_rate * dC_dw2
b2 -= learning_rate * dC_db2
return w1, b1, w2, b2, loss
def train_mlp(
w1: np.ndarray,
b1: np.ndarray,
w2: np.ndarray,
b2: np.ndarray,
data_train: np.ndarray,
labels_train: np.ndarray,
learning_rate: float,
num_epoch: int,
) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, list[float]):
"""Perform num_epoch training steps.
Args:
w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
data_train (np.ndarray): The data, of shape (N, d_in).
labels_train (np.ndarray): The targets, of shape (N, d_out).
learning_rate (float): The learning rate.
num_epoch (int): The number of epochs.
Returns:
(np.ndarray, np.ndarray, np.ndarray, np.ndarray, list[float]): A tuple containing the resulting weights and biases, and the list of accuracy values of each epoch.
"""
# Starting accuracy (random weights)
accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train)
accuracies = [accuracy]
for _ in range(num_epoch):
# Train once
w1, b1, w2, b2, _ = learn_once_mse(
w1, b1, w2, b2, data_train, labels_train, learning_rate
)
# Compute current model training accuracy
accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train)
accuracies.append(accuracy)
return w1, b1, w2, b2, accuracies
def test_mlp(
w1: np.ndarray,
b1: np.ndarray,
w2: np.ndarray,
b2: np.ndarray,
data_test: np.ndarray,
labels_test: np.ndarray,
) -> float:
"""Test the network on the given test set.
Args:
w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
data (np.ndarray): The data, of shape (N, d_in).
targets (np.ndarray): The targets, of shape (N, d_out).
Returns:
float: The testing accuracy of the model on the given data.
"""
# Forward pass
a0 = data_test # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (
1 + np.exp(-z1)
) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (
1 + np.exp(-z2)
) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute accuracy
accuracy = np.mean(np.argmax(predictions, axis=1) == labels_test)
return accuracy
def run_mlp_training(
data_train: np.ndarray,
labels_train: np.ndarray,
data_test: np.ndarray,
labels_test: np.ndarray,
d_h: int,
learning_rate: float,
num_epoch: int,
) -> (list[float], float):
"""Train an MLP classifier.
Args:
data_train (np.ndarray): The training data, of shape (N, d_in).
labels_train (np.ndarray): The training labels, of shape (N, d_out).
data_test (np.ndarray): The test data, of shape (N, d_in).
labels_test (np.ndarray): The test labels, of shape (N, d_out).
learning_rate (float): The learning rate.
num_epoch (int): The number of training epochs.
Returns:
(list[float], float): A tuple containing the list of training accuracy values of each epoch, and the final accuracy.
"""
d_in = data_train.shape[1]
d_out = labels_train.shape[0]
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
# Train the network
w1, b1, w2, b2, accuracy_values = train_mlp(
w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch
)
# Test the network
accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
return accuracy_values, accuracy
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment