Finaly done

79448985 · Jangberry (Nomad-Debian) · a95e44f9 · 79448985
Unverified Commit 79448985 authored Nov 10, 2023 by Jangberry (Nomad-Debian)
--- a/mlp.py
+++ b/mlp.py
 import numpy as np
+import read_cifar
 def learn_once_mse(w1: np.array, b1: int, w2: np.array, b2: int, data: np.array, target: np.array, learning_rate: float):
    """
@@ -76,7 +78,7 @@ def one_hot(labels: np.array):
    return one_hot_matrix
-def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.array, data: np.array, labels_train: np.array, learning_rate: int):
+def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.array, data: np.array, labels_train: np.array, learning_rate: float):
    """
    Performs one learning step of the MLP with cross-entropy loss
@@ -96,24 +98,42 @@ def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.ar
        loss -- loss of the forward pass
    """
    # Forward pass
-    a0 = data  # the data are the input of the first layer
+    # the data are the input of the first layer
-    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a0 = data
+    # input of the hidden layer
+    z1 = np.matmul(a0, w1) + b1
    # output of the hidden layer (sigmoid activation function)
    a1 = 1 / (1 + np.exp(-z1))
-    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    # input of the output layer
+    z2 = np.matmul(a1, w2) + b2
    # output of the output layer (sigmoid activation function)
-    a2 = 1 / (1 + np.exp(-z2))
+    a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)
-    predictions = a2  # the predicted values are the outputs of the output layer
+    # the predicted values are the outputs of the output layer
+    predictions = a2
+    one_hot_targets = one_hot(labels_train)
    # Compute loss (cross-entropy)
-    loss = -np.mean(np.sum(labels_train * np.log(predictions) +
+    loss = -np.mean(
-                    (1 - labels_train) * np.log(1 - predictions), axis=1))
+        one_hot_targets * np.log(predictions) +
+        (1 - one_hot_targets) * np.log(1 - predictions)
+    )
    # Backward pass
    # derivative of the loss with respect to the output of the output layer
-    dC_dA2 = -labels_train / predictions + (1 - labels_train) / (1 - predictions)
+    dC_dZ2 = predictions - one_hot_targets
-    # derivative of the loss with respect to the input of the output layer
+    # derivative of the loss with respect to the weights of the output layer
-    # dC_dZ2 = a2 - 
+    dC_dW2 = np.matmul(a1.T, dC_dZ2)
+    # derivative of the loss with respect to the biaises of the output layer
+    dC_dB2 = np.sum(dC_dZ2, axis=0, keepdims=True)
+    # derivative of the loss with respect to the output of the hidden layer
+    dC_dA1 = np.matmul(dC_dZ2, w2.T)
+    # derivative of the loss with respect to the input of the hidden layer
+    dC_dZ1 = dC_dA1 * (1 - a1) * a1
+    # derivative of the loss with respect to the weights of the hidden layer
+    dC_dW1 = np.matmul(a0.T, dC_dZ1)
+    # derivative of the loss with respect to the biaises of the hidden layer
+    dC_dB1 = np.sum(dC_dZ1, axis=0, keepdims=True)
    # Update weights and biaises
    w1 -= learning_rate * dC_dW1
@@ -124,6 +144,103 @@ def learn_once_cross_entropy(w1: np.array, b1: np.array, w2: np.array, b2: np.ar
    return w1, b1, w2, b2, loss
+def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs):
+    """
+    Trains the MLP
+    Arguments:
+        w1 -- weights of the hidden layer
+        b1 -- biaises of the hidden layer
+        w2 -- weights of the output layer
+        b2 -- biaises of the output layer
+        data_train -- training data
+        labels_train -- labels of the training data
+        learning_rate -- learning rate
+        num_epochs -- number of epochs
+    Returns:
+        w1 -- updated weights of the hidden layer
+        b1 -- updated biaises of the hidden layer
+        w2 -- updated weights of the output layer
+        b2 -- updated biaises of the output layer
+        acc -- list of accuracies across epochs
+    """
+    acc = []
+    for i in range(num_epochs):
+        w1, b1, w2, b2, loss = learn_once_cross_entropy(
+            w1, b1, w2, b2, data_train, labels_train, learning_rate)
+        acc.append(test_mlp(w1, b1, w2, b2, data_train, labels_train))
+    return w1, b1, w2, b2, acc
+def test_mlp(w1, b1, w2, b2, data_test, labels_test):
+    """
+    Tests the MLP
+    Arguments:
+        w1 -- weights of the hidden layer
+        b1 -- biaises of the hidden layer
+        w2 -- weights of the output layer
+        b2 -- biaises of the output layer
+        data_test -- test data
+        labels_test -- labels of the test data
+    Returns:
+        acc -- accuracy
+    """
+    # Forward pass
+    # the data are the input of the first layer
+    a0 = data_test
+    # input of the hidden layer
+    z1 = np.matmul(a0, w1) + b1
+    # output of the hidden layer (sigmoid activation function)
+    a1 = 1 / (1 + np.exp(-z1))
+    # input of the output layer
+    z2 = np.matmul(a1, w2) + b2
+    # output of the output layer (sigmoid activation function)
+    a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)
+    # the predicted values are the outputs of the output layer
+    predictions = a2
+    # Compute accuracy
+    acc = np.mean(np.argmax(predictions, axis=1) == labels_test)
+    return acc
+def run_mlp_training(data_train, labels_train, data_test, labels_test, learning_rate, num_epochs):
+    """
+    Runs the MLP training
+    Arguments:
+        data_train -- training data
+        labels_train -- labels of the training data
+        data_test -- test data
+        labels_test -- labels of the test data
+        learning_rate -- learning rate
+        n_epochs -- number of epochs
+    Returns:
+        w1 -- weights of the hidden layer
+        b1 -- biaises of the hidden layer
+        w2 -- weights of the output layer
+        b2 -- biaises of the output layer
+        acc -- list of accuracies across epochs
+    """
+    N = data_train.shape[0]  # number of training data
+    d_in = data_train.shape[1]  # input dimension
+    d_h = 3  # number of neurons in the hidden layer
+    # output dimension (number of neurons of the output layer)
+    d_out = np.max(labels_train) + 1
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+    w1, b1, w2, b2, acc = train_mlp(
+        w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs)
+    return w1, b1, w2, b2, acc
 if __name__ == "__main__":
    N = 30  # number of input data
    d_in = 3  # input dimension
@@ -145,3 +262,28 @@ if __name__ == "__main__":
        print(loss)
    print(one_hot(np.array([9, 1, 3, 0, 6, 5, 2, 7, 8, 4])))
+    N = 30  # number of input data
+    d_in = 3  # input dimension
+    d_h = 3  # number of neurons in the hidden layer
+    d_out = 5  # output dimension (number of neurons of the output layer)
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+    data = np.random.rand(N, d_in)  # create a random data
+    targets = np.random.randint(1, d_out, N)  # create a random targets
+    for i in range(100):
+        w1, b1, w2, b2, loss = learn_once_cross_entropy(
+            w1, b1, w2, b2, data, targets, 0.1)
+        print(loss)
+    data, labels = read_cifar.read_cifar("data/cifar-10-batches-py/")
+    data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(
+        data, labels, 0.8)
+    w1, b1, w2, b2, acc = run_mlp_training(
+        data_train, labels_train, data_test, labels_test, 0.1, 100)
+    print(acc)