com

cd1f2e19 · Cart Milan · f6c99d4c · cd1f2e19
Commit cd1f2e19 authored 1 year ago by Cart Milan
--- a/mlp.py
+++ b/mlp.py
@@ -8,31 +8,21 @@ def sigmoid(x):

 def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):

-    N_out = len(data) #number of training examples
-    # Forward pass
-    a0 = data # the data are the input of the first layer
-    z1 = np.dot(a0, w1) + b1  # input of the hidden layer
-    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
-    z2 = np.dot(a1, w2) + b2  # input of the output layer
-    a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
-    predictions = a2  # the predicted values are the outputs of the output layer
+    N_out = len(data) 
+   
+    a0 = data 
+    z1 = np.dot(a0, w1) + b1  
+    a1 = sigmoid(z1) 
+    z2 = np.dot(a1, w2) + b2  
+    a2 = sigmoid(z2) 
+    predictions = a2  

    # Compute loss (MSE)
    loss = np.mean(np.square(predictions - targets))
    print(f'loss: {loss}')
-    # print('shape a1', a1.shape)
-    # print('shape w1', w1.shape)
-    # print('shape b1', b1.shape)
-
-    # print('shape a2', a2.shape)
-    # print('shape w2', w2.shape)
-    # print('shape b2', b2.shape)
    
-    # Backpropagation
-    
-    # Backpropagation
    delta_a2 = 2 / N_out * (a2 - targets)
-    delta_z2 = delta_a2 * (a2 * (1 - a2))  # We divide by the sample size to have an average on the error and avoid big gradient jumps
+    delta_z2 = delta_a2 * (a2 * (1 - a2))  
    delta_w2 = np.dot(a1.T, delta_z2) 
    delta_b2 = np.sum(delta_z2, axis = 0, keepdims = True) 

@@ -44,12 +34,12 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
    return w1, b1, w2, b2, loss

 def one_hot(labels):
-    num_classes = int(np.max(labels) + 1) #num_classes = 10
+    num_classes = int(np.max(labels) + 1) 
    one_hot_matrix = np.eye(num_classes)[labels]
    return one_hot_matrix

 def softmax_stable(x):
-    #We use this function to avoid computing big numbers
+   
    return(np.exp(x - np.max(x, axis=1, keepdims=True)) / np.exp(x - np.max(x, axis=1, keepdims=True)).sum())

 def cross_entropy_loss(y_pred, y_true_one_hot):