diff --git a/mlp.py b/mlp.py
index 5a91c0d638f9782f7250c999bfc3817d56bff3d2..aa626d038348f6c33a0f76efee44b95ca5d0470b 100644
--- a/mlp.py
+++ b/mlp.py
@@ -3,25 +3,29 @@ import pandas as pd
 
 
 def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
-	"""Take the arrays w1,b1,w2,b2 of a 2-layers neural network 
-	,update them with a gradient descent
-	and calculate the average lost the MSE method """
-	
-    d_in , d_h = w1.shape  # extracts the dimensions of the variables to define future np.arrays
-    N , d_out = targets.shape
+
+    """Take the arrays w1,b1,w2,b2 of a 2-layers neural network
+    ,update them with a gradient descent
+    and calculate the average lost the MSE method"""
+    (
+        d_in,
+        d_h,
+    ) = w1.shape  # extracts the dimensions of the variables to define future np.arrays
+    N, d_out = targets.shape
     a0 = data  # the data are the input of the first layer
     z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
     a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
     z2 = np.matmul(a1, w2) + b2  # input of the output layer
     a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
     predictions = a2  # the predicted values are the outputs of the output layer
-    #Create the gradient for the variables w2,b2,w1,b1
+    # Create the gradient for the variables w2,b2,w1,b1
+    loss = np.mean(np.square(predictions - targets))
     dCdw2 = np.zeros((d_h, d_out))
-    dCdb2 = np.zeros((1, d_out))
+    dCdb2 = np.zeros(d_out)
     dCdw1 = np.zeros((d_in, d_h))
-    dCdb1 = np.zeros((1, d_h))
-	
-	#take each data with its respective labels  
+    dCdb1 = np.zeros(d_h)
+
+    # take each data with its respective labels
     for dataRow, targetsRow in zip(data, targets):
 
         a0 = dataRow  # the data are the input of the first layer
@@ -31,56 +35,42 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
         a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
         predictionsRow = a2  # the predicted values are the outputs of the output layer
 
-        # Calculate the partial derivative of the cost in relaltion to each network output 
-        dCda = 2 * (predictionsRow - targetsRow)
-		# sum the contribution of each data for the w2  updating
-        for l in range( d_h ):
-            for m in range( d_out ):
-                dCdw2[l][m] += ( 
-                    dCda[l] 
-                    * a2[l] 
-                    * (1 - a2[l]) 
-                    * a1[m]
-                  )
-		# sum the contribution of each data for the b2  updating
-        for l in range( d_out ):
-            dCdb2[0][l] += ( 
-                dCda[l] 
-                * a2[l] 
-                * (1 - a2[l])
-             )
-		# sum the contribution of each data for the w1  updating
-        for l in range( d_in ):
-            for m in range( d_h ):
-                for j in range( d_out ):
-                    dCdw1[l][m] += (
+        # Calculate the partial derivative of the cost in relaltion to each network output
+        dCda = (2 / d_out) * (predictionsRow - targetsRow)
+        # sum the contribution of each data for the w2  updating
+        for r in range(d_h):
+            for c in range(d_out):
+                dCdw2[r][c] += dCda[c] * a2[c] * (1 - a2[c]) * a1[r]
+        # sum the contribution of each data for the b2  updating
+        for r in range(d_out):
+            dCdb2[r] += dCda[r] * a2[r] * (1 - a2[r])
+        # sum the contribution of each data for the w1  updating
+        for r in range(d_in):
+            for c in range(d_h):
+                for j in range(d_out):
+                    dCdw1[r][c] += (
                         dCda[j]
                         * a2[j]
                         * (1 - a2[j])
-                        * w2[j][l]
-                        * a1[l]
-                        * (1 - a1[l])
-                        * a0[m]
+                        * w2[c][j]
+                        * a1[c]
+                        * (1 - a1[c])
+                        * a0[r]
                     )
-		# sum the contribution of each data for the b1  updating
-        for l in range( d_h ):
-            for j in range( d_out ):
-                dCdb1[0][l] += (
-                    dCda[j] 
-                    * a2[j] 
-                    * (1 - a2[j]) 
-                    * w2[j][l] 
-                    * a1[l] 
-                    * (1 - a1[l])
+        # sum the contribution of each data for the b1  updating
+        for l in range(d_h):
+            for j in range(d_out):
+                dCdb1[l] += (
+                    dCda[j] * a2[j] * (1 - a2[j]) * w2[r][j] * a1[r] * (1 - a1[r])
                 )
-                
-	#Average value of each data contribution
+
+    # Average value of each data contribution
     dCdw1 = dCdw1 / N
     dCdb1 = dCdb1 / N
     dCdw2 = dCdw2 / N
     dCdb2 = dCdb2 / N
-    
-	#Arrays update
+
+    # Arrays update
     w1 -= learning_rate * dCdw1
     b1 -= learning_rate * dCdb1
     w2 -= learning_rate * dCdw2
@@ -88,9 +78,9 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
 
     # realizing a new network interaction with new values
     a0 = data  # the data are the input of the first layer
-    new_z1 = np.matmul(a0, new_w1) + new_b1  # input of the hidden layer
-    new_a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
-    z2 = np.matmul(new_a1, new_w2) + new_b2  # input of the output layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
     a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
     predictions = a2  # the predicted values are the outputs of the output layer
 
@@ -98,89 +88,85 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
     loss = np.mean(np.square(predictions - targets))
 
     return w1, b1, w2, b2, loss
-    
+
+
 def one_hot(labels):
 
     """Returns the 2d array with binary vectors with the 1's in the respective position of the sort matrix"""
-    oneHotMat = np.zeros((labels.size, labels.size), dtype=int)
-    
+    oneHotMat = np.zeros((labels.size, 10), dtype=int)
+
     for index, values in enumerate(labels):
-        oneHotMat[index, values] = 1
+        oneHotMat[index][values] = 1
 
     return oneHotMat
 
+
 def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
-	"""Take the arrays w1,b1,w2,b2 of a 2-layers neural network 
-	,update them with a gradient descent
-	and calculate the average lost the cross - entropy method """
+    """Take the arrays w1,b1,w2,b2 of a 2-layers neural network
+    ,update them with a gradient descent
+    and calculate the average lost the cross - entropy method"""
+
+    (
+        d_in,
+        d_h,
+    ) = w1.shape  # extracts the dimensions of the variables to define future np.arrays
+    N = labels_train.shape[0]
+    d_out = b2.shape[0]
 
-    d_in , d_h = w1.shape  # extracts the dimensions of the variables to define future np.arrays
-    N , d_out = targets.shape
     a0 = data  # the data are the input of the first layer
     z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
     a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
     z2 = np.matmul(a1, w2) + b2  # input of the output layer
     a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
     predictions = a2  # the predicted values are the outputs of the output layer
-	oneHot = one_hot(labels_train)
-	
-	#Create the gradient for the variables w2,b2,w1,b1
+    oneHot = one_hot(labels_train)
+    loss = np.mean(
+        (-oneHot * np.log(predictions)) - (1 - oneHot) * np.log(1 - predictions)
+    )
+    print(loss)
+    # Create the gradient for the variables w2,b2,w1,b1
     dCdw2 = np.zeros((d_h, d_out))
-    dCdb2 = np.zeros((1, d_out))
+    dCdb2 = np.zeros(d_out)
     dCdw1 = np.zeros((d_in, d_h))
-    dCdb1 = np.zeros((1, d_h))
-    
-	#take each data with its respective labels  
-    for dataRow, oneHotLabel in zip(data, oneHot):
+    dCdb1 = np.zeros(d_h)
 
+    # take each data with its respective labels
+    for dataRow, oneHotRow in zip(data, oneHot):
         a0 = dataRow  # the data are the input of the first layer
         z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
         a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
         z2 = np.matmul(a1, w2) + b2  # input of the output layer
         a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
-        predictionsRow = a2  # the predicted values are the outputs of the output layer
-        dCdz2 = predictionsRow - oneHotLabel
-        
-		# sum the contribution of each data for the w2  updating
-        for l in range( d_h ):
-            for m in range( d_out ):
-                dCdw2[l][m] += ( 
-                    dCdz2[l] 
-                    * a1[m] )
-                    
-		# sum the contribution of each data for the b2  updating
-        for l in range( d_out ):
-            dCdb2[0][l] += ( 
-                dCdz2[l] 
-			)
-		# sum the contribution of each data for the w1  updating			
-        for l in range( d_in ):
-            for m in range( d_h ) :
-                for j in range( d_out ):
-                    dCdw1[l][m] += (
-                        dCdz2[j]
-                        * w2[j][l]
-                        * a1[l]
-                        * (1 - a1[l])
-                        * a0[m]
-                    )
-		# sum the contribution of each data for the b1  updating
-        for l in range( d_h ):
-            for j in range( d_out ):
-                dCdb1[0][l] += (
-                    dCdz2[j]  
-                    * w2[j][l] 
-                    * a1[l] 
-                    * (1 - a1[l])
-                )
-
-	#Average value of each data contribution
+        # the predicted values are the outputs of the output layer
+        dCdz2 = a2 - oneHotRow
+
+        # sum the contribution of each data for the w2  updating
+        for r in range(d_h):
+            for c in range(d_out):
+                dCdw2[r][c] += dCdz2[c] * a1[r]
+
+        # sum the contribution of each data for the b2  updating
+        for r in range(d_out):
+            dCdb2[r] += dCdz2[r]
+
+        # sum the contribution of each data for the w1  updating
+        for r in range(d_in):
+            for c in range(d_h):
+                for j in range(d_out):
+                    dCdw1[r][c] += dCdz2[j] * w2[c][j] * a1[c] * (1 - a1[c]) * a0[r]
+
+        # sum the contribution of each data for the b1  updating
+        for r in range(d_h):
+            for j in range(d_out):
+                dCdb1[r] += dCdz2[j] * w2[r][j] * a1[r] * (1 - a1[r])
+
+    # Average value of each data contribution
     dCdw1 = dCdw1 / N
     dCdb1 = dCdb1 / N
     dCdw2 = dCdw2 / N
     dCdb2 = dCdb2 / N
-    
-	#Arrays update
+
+    # Arrays update
     w1 -= learning_rate * dCdw1
     b1 -= learning_rate * dCdb1
     w2 -= learning_rate * dCdw2
@@ -188,14 +174,53 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
 
     # realizing a new network interaction with new values
     a0 = data  # the data are the input of the first layer
-    new_z1 = np.matmul(a0, new_w1) + new_b1  # input of the hidden layer
-    new_a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
-    z2 = np.matmul(new_a1, new_w2) + new_b2  # input of the output layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
     a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
     predictions = a2  # the predicted values are the outputs of the output layer
 
     # Compute loss (Entropy Loss)
-    
-	loss = np.mean( ( -1 * oneHot * np.log( predictions ) ) - ( 1 - oneHot ) * np.log( 1 - predictions ) )
+
+    loss = np.mean(
+        (-oneHot * np.log(predictions)) - (1 - oneHot) * np.log(1 - predictions)
+    )
+    print(loss)
 
     return w1, b1, w2, b2, loss
+
+
+def train_mlp(w1, b1, w2, b2, data, labels_train, learning_rate, num_epoch):
+
+    train_accuracies = []
+    for i in range(num_epoch):
+        w1, b1, w2, b2, loss = learn_once_cross_entropy(
+            w1, b1, w2, b2, data, labels_train, learning_rate
+        )
+        train_accuracies.append(loss)
+    return w1, b1, w2, b2, train_accuracies
+
+
+################################################################################
+# RANDOM TEST
+######################################################################################
+N = 30  # number of input data
+d_in = 3  # input dimension
+d_h = 5  # number of neurons in the hidden layer
+d_out = 10  # output dimension (number of neurons of the output layer)
+
+# Random initialization of the network weights and biaises
+w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+b1 = np.zeros(d_h)  # first layer biaises
+w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+b2 = np.zeros(d_out)  # second layer biaises
+
+data = np.random.rand(N, d_in)  # create a random data
+# targets = np.random.rand(N, d_out)  # create a random targets
+labels_train = np.random.randint(3, size=N)  # create a random targets
+# print(w1, b1, w2, b2)
+# print('NEW VALUES')
+w1, b1, w2, b2, loss = learn_once_cross_entropy(
+    w1, b1, w2, b2, data, labels_train, 0.01
+)
+# print(w1, b1, w2, b2, loss)