From b1f3443c0e1e02ee1532727fd0eaee96e8dea06e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C3=ADas=20Duhalde?= <matias.duhalde@uc.cl> Date: Tue, 31 Oct 2023 08:37:25 +0100 Subject: [PATCH] feat: mlp functions --- mlp.py | 264 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 mlp.py diff --git a/mlp.py b/mlp.py new file mode 100644 index 0000000..9e45e9c --- /dev/null +++ b/mlp.py @@ -0,0 +1,264 @@ +import numpy as np + + +def learn_once_mse( + w1: np.ndarray, + b1: np.ndarray, + w2: np.ndarray, + b2: np.ndarray, + data: np.ndarray, + targets: np.ndarray, + learning_rate: float, +) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): + """Perform one step of gradient descent on the given data and targets. + + Args: + w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h). + b1 (np.ndarray): The bias of the first layer, of shape (1, d_h). + w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out). + b2 (np.ndarray): The bias of the second layer, of shape (1, d_out). + data (np.ndarray): The data, of shape (N, d_in). + targets (np.ndarray): The targets, of shape (N, d_out). + learning_rate (float): The learning rate. + + Returns: + (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): A tuple containing the updated weights and biases, and the loss. + """ + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / ( + 1 + np.exp(-z1) + ) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / ( + 1 + np.exp(-z2) + ) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + # Compute loss (MSE) + loss = np.mean(np.square(predictions - targets)) + + # Backward pass + + # Compute gradients + dC_da2 = 2 * (predictions - targets) / predictions.shape[0] + dC_dz2 = dC_da2 * a2 * (1 - a2) + dC_dw2 = np.matmul(a1.T, dC_dz2) + dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True) + dC_da1 = np.matmul(dC_dz2, w2.T) + dC_dz1 = dC_da1 * a1 * (1 - a1) + dC_dw1 = np.matmul(a0.T, dC_dz1) + dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True) + + # Update weights and biases + w1 -= learning_rate * dC_dw1 + b1 -= learning_rate * dC_db1 + w2 -= learning_rate * dC_dw2 + b2 -= learning_rate * dC_db2 + + return w1, b1, w2, b2, loss + + +def one_hot(labels: np.ndarray) -> np.ndarray: + """Calculates the one-hot matrix of the given labels. + + Args: + labels (np.ndarray): The labels. + + Returns: + np.ndarray: The one-hot matrix of the labels. + """ + return np.eye(labels.max() + 1)[labels] + + +def learn_once_cross_entropy( + w1: np.ndarray, + b1: np.ndarray, + w2: np.ndarray, + b2: np.ndarray, + data: np.ndarray, + labels_train: np.ndarray, + learning_rate: float, +) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): + """Perform one step of gradient descent using a binary cross-entropy loss on the given data and targets. + + Args: + w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h). + b1 (np.ndarray): The bias of the first layer, of shape (1, d_h). + w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out). + b2 (np.ndarray): The bias of the second layer, of shape (1, d_out). + data (np.ndarray): The data, of shape (N, d_in). + targets (np.ndarray): The targets, of shape (N, d_out). + learning_rate (float): The learning rate. + + Returns: + (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): A tuple containing the updated weights and biases, and the loss. + """ + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / ( + 1 + np.exp(-z1) + ) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / ( + 1 + np.exp(-z2) + ) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + one_hot_targets = one_hot(labels_train) + + # Compute loss (Cross Entropy) + # https://arize.com/blog-course/binary-cross-entropy-log-loss/ + loss = -np.mean( + one_hot_targets * np.log(predictions) + + (1 - one_hot_targets) * np.log(1 - predictions) + ) + + # Backward pass + + # Compute gradients + + dC_dz2 = a2 - one_hot_targets + dC_dw2 = np.matmul(a1.T, dC_dz2) + dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True) + dC_da1 = np.matmul(dC_dz2, w2.T) + dC_dz1 = dC_da1 * a1 * (1 - a1) + dC_dw1 = np.matmul(a0.T, dC_dz1) + dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True) + + # Update weights and biases + w1 -= learning_rate * dC_dw1 + b1 -= learning_rate * dC_db1 + w2 -= learning_rate * dC_dw2 + b2 -= learning_rate * dC_db2 + + return w1, b1, w2, b2, loss + + +def train_mlp( + w1: np.ndarray, + b1: np.ndarray, + w2: np.ndarray, + b2: np.ndarray, + data_train: np.ndarray, + labels_train: np.ndarray, + learning_rate: float, + num_epoch: int, +) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, list[float]): + """Perform num_epoch training steps. + + Args: + w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h). + b1 (np.ndarray): The bias of the first layer, of shape (1, d_h). + w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out). + b2 (np.ndarray): The bias of the second layer, of shape (1, d_out). + data_train (np.ndarray): The data, of shape (N, d_in). + labels_train (np.ndarray): The targets, of shape (N, d_out). + learning_rate (float): The learning rate. + num_epoch (int): The number of epochs. + + Returns: + (np.ndarray, np.ndarray, np.ndarray, np.ndarray, list[float]): A tuple containing the resulting weights and biases, and the list of accuracy values of each epoch. + """ + + # Starting accuracy (random weights) + accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train) + accuracies = [accuracy] + + for _ in range(num_epoch): + # Train once + w1, b1, w2, b2, _ = learn_once_mse( + w1, b1, w2, b2, data_train, labels_train, learning_rate + ) + + # Compute current model training accuracy + accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train) + accuracies.append(accuracy) + + return w1, b1, w2, b2, accuracies + + +def test_mlp( + w1: np.ndarray, + b1: np.ndarray, + w2: np.ndarray, + b2: np.ndarray, + data_test: np.ndarray, + labels_test: np.ndarray, +) -> float: + """Test the network on the given test set. + + Args: + w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h). + b1 (np.ndarray): The bias of the first layer, of shape (1, d_h). + w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out). + b2 (np.ndarray): The bias of the second layer, of shape (1, d_out). + data (np.ndarray): The data, of shape (N, d_in). + targets (np.ndarray): The targets, of shape (N, d_out). + + Returns: + float: The testing accuracy of the model on the given data. + """ + # Forward pass + a0 = data_test # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / ( + 1 + np.exp(-z1) + ) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / ( + 1 + np.exp(-z2) + ) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + # Compute accuracy + accuracy = np.mean(np.argmax(predictions, axis=1) == labels_test) + + return accuracy + + +def run_mlp_training( + data_train: np.ndarray, + labels_train: np.ndarray, + data_test: np.ndarray, + labels_test: np.ndarray, + d_h: int, + learning_rate: float, + num_epoch: int, +) -> (list[float], float): + """Train an MLP classifier. + + Args: + data_train (np.ndarray): The training data, of shape (N, d_in). + labels_train (np.ndarray): The training labels, of shape (N, d_out). + data_test (np.ndarray): The test data, of shape (N, d_in). + labels_test (np.ndarray): The test labels, of shape (N, d_out). + learning_rate (float): The learning rate. + num_epoch (int): The number of training epochs. + + Returns: + (list[float], float): A tuple containing the list of training accuracy values of each epoch, and the final accuracy. + """ + + d_in = data_train.shape[1] + d_out = labels_train.shape[0] + + # Random initialization of the network weights and biaises + w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights + b1 = np.zeros((1, d_h)) # first layer biaises + w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights + b2 = np.zeros((1, d_out)) # second layer biaises + + # Train the network + w1, b1, w2, b2, accuracy_values = train_mlp( + w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch + ) + + # Test the network + accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + + return accuracy_values, accuracy -- GitLab