diff --git a/mlp.py b/mlp.py index b8e5eb0fddf9ddecec9eb5860b15f38a4fd5aff8..1b7402fd2ff1ba2db2a8c3806ce81a23def04dfd 100644 --- a/mlp.py +++ b/mlp.py @@ -4,7 +4,8 @@ def sigmoid(x): return 1 / (1 + np.exp(-x)) def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): - N = len(targets) + N = len(targets) # number of training examples + # Forward pass a0 = data # the data are the input of the first layer z1 = np.matmul(a0, w1) + b1 # input of the hidden layer @@ -26,10 +27,41 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): d_w1 = np.matmul(a0.T, d_z1) d_b1 = d_z1 - # Calculation of the updated weights and biases of the network + # Calculation of the updated weights and biases of the network with gradient descent method + w1 -= learning_rate * d_w1 + w2 -= learning_rate * d_w2 + b2 -= learning_rate * d_b2 + b1 -= learning_rate * d_b1 + return w1, b1, w2, b2, loss + + +def one_hot(labels): + # Total number of classes + num_classes = np.max(labels) + 1 + # one_hot_matrix + one_hot_matrix = np.eye(num_classes)[labels] + return one_hot_matrix + + +def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): + N = len(labels_train) # number of training examples + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + targets_one_hot = one_hot(labels_train) # target as a one-hot encoding for the desired labels + + # cross-entropy loss + loss = return w1, b1, w2, b2, loss +