diff --git a/knn.py b/knn.py
index 23f64f3ee1d67c334d5300de7a604cd8bd1c33c0..1dc2048c3ec717abc87062e60148a8088a39009b 100644
--- a/knn.py
+++ b/knn.py
@@ -58,7 +58,7 @@ if __name__== '__main__':
     plt.title("Accuracy=f(k)")
     plt.xlabel("k")
     plt.ylabel("Accuracy") 
-    plt.savefig('C:\\Users\\LENOVO\\Desktop\\deeplearning\\BE1 - Image Classification\\image-classification\\results')  
+    plt.savefig('C:\\Users\\LENOVO\\Desktop\\deeplearning\\BE1 - Image Classification\\image-classification\\results\\knn.png')  
     plt.show() 
     
 
diff --git a/mlp.py b/mlp.py
index 77dc772a4c987640b6001611dcbb692a78404fcf..cf892959dc561a2ebafe94ea42aa44754c2c51dc 100644
--- a/mlp.py
+++ b/mlp.py
@@ -9,7 +9,7 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
     z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
     a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
     z2 = np.matmul(a1, w2) + b2  # input of the output layer
-    a2 = np.exp(z2)/np.sum(z2)  # output of the output layer (softmax activation function)
+    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
     predictions = a2  # the predicted values are the outputs of the output layer
 
     # Compute loss (MSE)
@@ -64,17 +64,17 @@ def learn_once_cross_entropy(w1,b1,w2,b2,data,labels_train,learning_rate):
     predictions = a2  # the predicted values are the outputs of the output layer
 
     # Compute loss (Binary X-entropy)
-    loss = - np.sum(one_hot_labels*np.log(predictions)+(1-one_hot_labels)*np.log(1-predictions))/N
+    loss = - np.sum(one_hot_labels*np.log(predictions)+(1-one_hot_labels)*np.log(1-predictions))
 
     # Backward pass
 
     dz2= a2-one_hot_labels
-    dw2=np.dot(np.transpose(a1),dz2)
+    dw2=np.dot(np.transpose(a1),dz2)/N
     db2=dz2
 
     da1=np.dot(dz2,np.transpose(w2))
     dz1=da1*a1*(1-a1)
-    dw1=np.dot(np.transpose(a0),dz1)
+    dw1=np.dot(np.transpose(a0),dz1)/N
     db1=dz1
 
     w1 -=learning_rate*dw1
@@ -88,17 +88,19 @@ def learn_once_cross_entropy(w1,b1,w2,b2,data,labels_train,learning_rate):
 
 #Q13
 def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch):
-    #encoding one hot labels
-    one_hot_labels = one_hot(labels_train)
+    
+    one_hot_labels = one_hot(labels_train)#encoding one hot labels
     N,_=np.shape(data_train)
     train_accuracies=[]
     for i in range(num_epoch):
+        
         w1,b1,w2,b2, loss, predictions= learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate)
         # predictions is a matrix of probabilities, we need to put one for the biggest propobility for each indivual
         maxi=np.max(predictions,1)
         predictions_zeros_ones=np.floor(predictions/maxi[:, np.newaxis]).astype(int)
-        A=np.sum(one_hot_labels==predictions_zeros_ones)
+        A=np.sum(np.all(one_hot_labels==predictions_zeros_ones,axis=1))
         train_accuracies.append(A/N)
+        print(i,A/N)
 
     return w1,b1,w2,b2, train_accuracies
 
@@ -119,9 +121,12 @@ def test_mlp(w1,b1,w2,b2,data_test,labels_test):
     N,_=np.shape(data_test)
     maxi=np.max(predictions,1)
     predictions_zeros_ones=np.floor(predictions/maxi[:, np.newaxis]).astype(int)
-    A=np.sum(one_hot_labels==predictions_zeros_ones)
+    V=np.all(one_hot_labels==predictions_zeros_ones,axis=1)
+    A=np.sum(V)   
     test_accuracy=A/N
 
+    print('test',A/N)
+
     return test_accuracy
 
 #Q15
diff --git a/results/mpl.png b/results/mpl.png
index 643b74238983a35fd944f52f4e934a3f879781eb..db6a68ab85fb9980bc0eb134dcc007af10a2c117 100644
Binary files a/results/mpl.png and b/results/mpl.png differ