feat: use softmax

5f854f60 · Matías Duhalde · fffcc01f · 5f854f60
Commit 5f854f60 authored 1 year ago by Matías Duhalde
--- a/mlp.py
+++ b/mlp.py
@@ -98,16 +98,12 @@ def learn_once_cross_entropy(
    """
    # Forward pass
-    a0 = data  # the data are the input of the first layer
+    a0 = data
-    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    z1 = np.matmul(a0, w1) + b1
-    a1 = 1 / (
+    a1 = 1 / (1 + np.exp(-z1))
-        1 + np.exp(-z1)
+    z2 = np.matmul(a1, w2) + b2
-    )  # output of the hidden layer (sigmoid activation function)
+    a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)
-    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    predictions = a2
-    a2 = 1 / (
-        1 + np.exp(-z2)
-    )  # output of the output layer (sigmoid activation function)
-    predictions = a2  # the predicted values are the outputs of the output layer
    one_hot_targets = one_hot(labels_train)
@@ -171,7 +167,7 @@ def train_mlp(
    for _ in range(num_epoch):
        # Train once
-        w1, b1, w2, b2, _ = learn_once_mse(
+        w1, b1, w2, b2, _ = learn_once_cross_entropy(
            w1, b1, w2, b2, data_train, labels_train, learning_rate
        )
@@ -204,16 +200,12 @@ def test_mlp(
        float: The testing accuracy of the model on the given data.
    """
    # Forward pass
-    a0 = data_test  # the data are the input of the first layer
+    a0 = data_test
-    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    z1 = np.matmul(a0, w1) + b1
-    a1 = 1 / (
+    a1 = 1 / (1 + np.exp(-z1))
-        1 + np.exp(-z1)
+    z2 = np.matmul(a1, w2) + b2
-    )  # output of the hidden layer (sigmoid activation function)
+    a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)
-    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    predictions = a2
-    a2 = 1 / (
-        1 + np.exp(-z2)
-    )  # output of the output layer (sigmoid activation function)
-    predictions = a2  # the predicted values are the outputs of the output layer
    # Compute accuracy
    accuracy = np.mean(np.argmax(predictions, axis=1) == labels_test)