diff --git a/mlp.py b/mlp.py
index b8e5eb0fddf9ddecec9eb5860b15f38a4fd5aff8..1b7402fd2ff1ba2db2a8c3806ce81a23def04dfd 100644
--- a/mlp.py
+++ b/mlp.py
@@ -4,7 +4,8 @@ def sigmoid(x):
     return 1 / (1 + np.exp(-x))
 
 def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
-    N = len(targets)
+    N = len(targets) # number of training examples
+    
     # Forward pass
     a0 = data # the data are the input of the first layer
     z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
@@ -26,10 +27,41 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
     d_w1 = np.matmul(a0.T, d_z1)
     d_b1 = d_z1
     
-    # Calculation of the updated weights and biases of the network
+    # Calculation of the updated weights and biases of the network with gradient descent method
+    w1 -= learning_rate * d_w1
+    w2 -= learning_rate * d_w2
+    b2 -= learning_rate * d_b2
+    b1 -= learning_rate * d_b1
     
+    return w1, b1, w2, b2, loss
+
+
+def one_hot(labels):
+    # Total number of classes
+    num_classes = np.max(labels) + 1
+    # one_hot_matrix
+    one_hot_matrix = np.eye(num_classes)[labels]
+    return one_hot_matrix
+
+
+def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
+    N = len(labels_train) # number of training examples
+    
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+    
+    targets_one_hot = one_hot(labels_train) # target as a one-hot encoding for the desired labels
+    
+    # cross-entropy loss
+    loss = 
     
     return w1, b1, w2, b2, loss
 
 
 
+