diff --git a/mlp.py b/mlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..92a28be21705e6c026265e879d790234c3c39204
--- /dev/null
+++ b/mlp.py
@@ -0,0 +1,35 @@
+import numpy as np
+import math
+
+def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, targets: np.ndarray, learning_rate: float):
+
+    # Forward pass
+    N = np.size(data, 0)
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + math.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (1 + math.exp(-z2))  # output of the output layer (sigmoid activation function)
+    predictions = a2  # the predicted values are the outputs of the output layer
+
+    # Compute loss (MSE)
+    loss = np.mean((predictions - targets)**2)
+    print(loss)
+
+    #Compute gradient dW
+    da2 = 2/N*(a2-targets)
+    dz2 = da2*a2*(1-a2)
+    dw2 = dz2*a1
+    db2 = dz2
+    da1 = dz2*np.sum(w2, axis=1)
+    dz1 = da1*a1*(1*a1)
+    dw1 = dz1*a0
+    db1 = dz1
+
+    w1 -= learning_rate*dw1
+    w2 -= learning_rate*dw2
+    b1 -= learning_rate*db1
+    b2 -= learning_rate*db2
+
+    
+    return w1, b1, w2, b2, loss