feat: mlp functions

b1f3443c · Matías Duhalde · 9d681b50 · b1f3443c
Commit b1f3443c authored 1 year ago by Matías Duhalde
--- a/mlp.py
+++ b/mlp.py
+import numpy as np
+
+
+def learn_once_mse(
+    w1: np.ndarray,
+    b1: np.ndarray,
+    w2: np.ndarray,
+    b2: np.ndarray,
+    data: np.ndarray,
+    targets: np.ndarray,
+    learning_rate: float,
+) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float):
+    """Perform one step of gradient descent on the given data and targets.
+
+    Args:
+        w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
+        b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
+        w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
+        b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
+        data (np.ndarray): The data, of shape (N, d_in).
+        targets (np.ndarray): The targets, of shape (N, d_out).
+        learning_rate (float): The learning rate.
+
+    Returns:
+        (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): A tuple containing the updated weights and biases, and the loss.
+    """
+
+    # Forward pass
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - targets))
+
+    # Backward pass
+
+    # Compute gradients
+    dC_da2 = 2 * (predictions - targets) / predictions.shape[0]
+    dC_dz2 = dC_da2 * a2 * (1 - a2)
+    dC_dw2 = np.matmul(a1.T, dC_dz2)
+    dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True)
+    dC_da1 = np.matmul(dC_dz2, w2.T)
+    dC_dz1 = dC_da1 * a1 * (1 - a1)
+    dC_dw1 = np.matmul(a0.T, dC_dz1)
+    dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True)
+
+    # Update weights and biases
+    w1 -= learning_rate * dC_dw1
+    b1 -= learning_rate * dC_db1
+    w2 -= learning_rate * dC_dw2
+    b2 -= learning_rate * dC_db2
+
+    return w1, b1, w2, b2, loss
+
+
+def one_hot(labels: np.ndarray) -> np.ndarray:
+    """Calculates the one-hot matrix of the given labels.
+
+    Args:
+        labels (np.ndarray): The labels.
+
+    Returns:
+        np.ndarray: The one-hot matrix of the labels.
+    """
+    return np.eye(labels.max() + 1)[labels]
+
+
+def learn_once_cross_entropy(
+    w1: np.ndarray,
+    b1: np.ndarray,
+    w2: np.ndarray,
+    b2: np.ndarray,
+    data: np.ndarray,
+    labels_train: np.ndarray,
+    learning_rate: float,
+) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float):
+    """Perform one step of gradient descent using a binary cross-entropy loss on the given data and targets.
+
+    Args:
+        w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
+        b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
+        w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
+        b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
+        data (np.ndarray): The data, of shape (N, d_in).
+        targets (np.ndarray): The targets, of shape (N, d_out).
+        learning_rate (float): The learning rate.
+
+    Returns:
+        (np.ndarray, np.ndarray, np.ndarray, np.ndarray, float): A tuple containing the updated weights and biases, and the loss.
+    """
+
+    # Forward pass
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    one_hot_targets = one_hot(labels_train)
+
+    # Compute loss (Cross Entropy)
+    # https://arize.com/blog-course/binary-cross-entropy-log-loss/
+    loss = -np.mean(
+        one_hot_targets * np.log(predictions)
+        + (1 - one_hot_targets) * np.log(1 - predictions)
+    )
+
+    # Backward pass
+
+    # Compute gradients
+
+    dC_dz2 = a2 - one_hot_targets
+    dC_dw2 = np.matmul(a1.T, dC_dz2)
+    dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True)
+    dC_da1 = np.matmul(dC_dz2, w2.T)
+    dC_dz1 = dC_da1 * a1 * (1 - a1)
+    dC_dw1 = np.matmul(a0.T, dC_dz1)
+    dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True)
+
+    # Update weights and biases
+    w1 -= learning_rate * dC_dw1
+    b1 -= learning_rate * dC_db1
+    w2 -= learning_rate * dC_dw2
+    b2 -= learning_rate * dC_db2
+
+    return w1, b1, w2, b2, loss
+
+
+def train_mlp(
+    w1: np.ndarray,
+    b1: np.ndarray,
+    w2: np.ndarray,
+    b2: np.ndarray,
+    data_train: np.ndarray,
+    labels_train: np.ndarray,
+    learning_rate: float,
+    num_epoch: int,
+) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, list[float]):
+    """Perform num_epoch training steps.
+
+    Args:
+        w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
+        b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
+        w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
+        b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
+        data_train (np.ndarray): The data, of shape (N, d_in).
+        labels_train (np.ndarray): The targets, of shape (N, d_out).
+        learning_rate (float): The learning rate.
+        num_epoch (int): The number of epochs.
+
+    Returns:
+        (np.ndarray, np.ndarray, np.ndarray, np.ndarray, list[float]): A tuple containing the resulting weights and biases, and the list of accuracy values of each epoch.
+    """
+
+    # Starting accuracy (random weights)
+    accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train)
+    accuracies = [accuracy]
+
+    for _ in range(num_epoch):
+        # Train once
+        w1, b1, w2, b2, _ = learn_once_mse(
+            w1, b1, w2, b2, data_train, labels_train, learning_rate
+        )
+
+        # Compute current model training accuracy
+        accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train)
+        accuracies.append(accuracy)
+
+    return w1, b1, w2, b2, accuracies
+
+
+def test_mlp(
+    w1: np.ndarray,
+    b1: np.ndarray,
+    w2: np.ndarray,
+    b2: np.ndarray,
+    data_test: np.ndarray,
+    labels_test: np.ndarray,
+) -> float:
+    """Test the network on the given test set.
+
+    Args:
+        w1 (np.ndarray): The weights of the first layer, of shape (d_in, d_h).
+        b1 (np.ndarray): The bias of the first layer, of shape (1, d_h).
+        w2 (np.ndarray): The weights of the second layer, of shape (d_h, d_out).
+        b2 (np.ndarray): The bias of the second layer, of shape (1, d_out).
+        data (np.ndarray): The data, of shape (N, d_in).
+        targets (np.ndarray): The targets, of shape (N, d_out).
+
+    Returns:
+        float: The testing accuracy of the model on the given data.
+    """
+    # Forward pass
+    a0 = data_test  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (
+        1 + np.exp(-z1)
+    )  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (
+        1 + np.exp(-z2)
+    )  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute accuracy
+    accuracy = np.mean(np.argmax(predictions, axis=1) == labels_test)
+
+    return accuracy
+
+
+def run_mlp_training(
+    data_train: np.ndarray,
+    labels_train: np.ndarray,
+    data_test: np.ndarray,
+    labels_test: np.ndarray,
+    d_h: int,
+    learning_rate: float,
+    num_epoch: int,
+) -> (list[float], float):
+    """Train an MLP classifier.
+
+    Args:
+        data_train (np.ndarray): The training data, of shape (N, d_in).
+        labels_train (np.ndarray): The training labels, of shape (N, d_out).
+        data_test (np.ndarray): The test data, of shape (N, d_in).
+        labels_test (np.ndarray): The test labels, of shape (N, d_out).
+        learning_rate (float): The learning rate.
+        num_epoch (int): The number of training epochs.
+
+    Returns:
+        (list[float], float): A tuple containing the list of training accuracy values of each epoch, and the final accuracy.
+    """
+
+    d_in = data_train.shape[1]
+    d_out = labels_train.shape[0]
+
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+
+    # Train the network
+    w1, b1, w2, b2, accuracy_values = train_mlp(
+        w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch
+    )
+
+    # Test the network
+    accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
+
+    return accuracy_values, accuracy