final

aba7e23b · Muniz Silva Samuel · f3414489 · aba7e23b
Commit aba7e23b authored Nov 1, 2022 by Muniz Silva Samuel
--- a/mlp.py
+++ b/mlp.py
 import numpy as np
-import tensorflow as tf
 import pandas as pd
-def sigm(x):
-  y = 1 / (1 + np.exp(-x))
-  return y
-def learn_once_mse(w1,b1,w2,b2,data,targests,learning_rate):
+def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
+	"""Take the arrays w1,b1,w2,b2 of a 2-layers neural network 
+	,update them with a gradient descent
+	and calculate the average lost the MSE method """
+    d_in , d_h = w1.shape  # extracts the dimensions of the variables to define future np.arrays
+    N , d_out = targets.shape
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
+    predictions = a2  # the predicted values are the outputs of the output layer
+    #Create the gradient for the variables w2,b2,w1,b1
+    dCdw2 = np.zeros((d_h, d_out))
+    dCdb2 = np.zeros((1, d_out))
+    dCdw1 = np.zeros((d_in, d_h))
+    dCdb1 = np.zeros((1, d_h))
+	#take each data with its respective labels  
+    for dataRow, targetsRow in zip(data, targets):
+        a0 = dataRow  # the data are the input of the first layer
+        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+        a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+        z2 = np.matmul(a1, w2) + b2  # input of the output layer
+        a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
+        predictionsRow = a2  # the predicted values are the outputs of the output layer
+        # Calculate the partial derivative of the cost in relaltion to each network output 
+        dCda = 2 * (predictionsRow - targetsRow)
+		# sum the contribution of each data for the w2  updating
+        for l in range( d_h ):
+            for m in range( d_out ):
+                dCdw2[l][m] += ( 
+                    dCda[l] 
+                    * a2[l] 
+                    * (1 - a2[l]) 
+                    * a1[m]
+                  )
+		# sum the contribution of each data for the b2  updating
+        for l in range( d_out ):
+            dCdb2[0][l] += ( 
+                dCda[l] 
+                * a2[l] 
+                * (1 - a2[l])
+             )
+		# sum the contribution of each data for the w1  updating
+        for l in range( d_in ):
+            for m in range( d_h ):
+                for j in range( d_out ):
+                    dCdw1[l][m] += (
+                        dCda[j]
+                        * a2[j]
+                        * (1 - a2[j])
+                        * w2[j][l]
+                        * a1[l]
+                        * (1 - a1[l])
+                        * a0[m]
+                    )
+		# sum the contribution of each data for the b1  updating
+        for l in range( d_h ):
+            for j in range( d_out ):
+                dCdb1[0][l] += (
+                    dCda[j] 
+                    * a2[j] 
+                    * (1 - a2[j]) 
+                    * w2[j][l] 
+                    * a1[l] 
+                    * (1 - a1[l])
+                )
+	#Average value of each data contribution
+    dCdw1 = dCdw1 / N
+    dCdb1 = dCdb1 / N
+    dCdw2 = dCdw2 / N
+    dCdb2 = dCdb2 / N
+	#Arrays update
+    w1 -= learning_rate * dCdw1
+    b1 -= learning_rate * dCdb1
+    w2 -= learning_rate * dCdw2
+    b2 -= learning_rate * dCdb2
+    # realizing a new network interaction with new values
+    a0 = data  # the data are the input of the first layer
+    new_z1 = np.matmul(a0, new_w1) + new_b1  # input of the hidden layer
+    new_a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+    z2 = np.matmul(new_a1, new_w2) + new_b2  # input of the output layer
+    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
+    predictions = a2  # the predicted values are the outputs of the output layer
+    # Compute loss (MSE)
+    loss = np.mean(np.square(predictions - targets))
    return w1, b1, w2, b2, loss
 def one_hot(labels):
+    """Returns the 2d array with binary vectors with the 1's in the respective position of the sort matrix"""
    oneHotMat = np.zeros((labels.size, labels.size), dtype=int)
    for index, values in enumerate(labels):
        oneHotMat[index, values] = 1
    return oneHotMat
-def learn_once_cross_entropy():
+def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
+	"""Take the arrays w1,b1,w2,b2 of a 2-layers neural network 
+	,update them with a gradient descent
+	and calculate the average lost the cross - entropy method """
-  return
+    d_in , d_h = w1.shape  # extracts the dimensions of the variables to define future np.arrays
+    N , d_out = targets.shape
+    a0 = data  # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
+    predictions = a2  # the predicted values are the outputs of the output layer
+	oneHot = one_hot(labels_train)
+	#Create the gradient for the variables w2,b2,w1,b1
+    dCdw2 = np.zeros((d_h, d_out))
+    dCdb2 = np.zeros((1, d_out))
+    dCdw1 = np.zeros((d_in, d_h))
+    dCdb1 = np.zeros((1, d_h))
+	#take each data with its respective labels  
+    for dataRow, oneHotLabel in zip(data, oneHot):
-print(one_hot(np.array([1,2,0,4,3])))
+        a0 = dataRow  # the data are the input of the first layer
\ No newline at end of file
+        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+        a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+        z2 = np.matmul(a1, w2) + b2  # input of the output layer
+        a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
+        predictionsRow = a2  # the predicted values are the outputs of the output layer
+        dCdz2 = predictionsRow - oneHotLabel
+		# sum the contribution of each data for the w2  updating
+        for l in range( d_h ):
+            for m in range( d_out ):
+                dCdw2[l][m] += ( 
+                    dCdz2[l] 
+                    * a1[m] )
+		# sum the contribution of each data for the b2  updating
+        for l in range( d_out ):
+            dCdb2[0][l] += ( 
+                dCdz2[l] 
+			)
+		# sum the contribution of each data for the w1  updating			
+        for l in range( d_in ):
+            for m in range( d_h ) :
+                for j in range( d_out ):
+                    dCdw1[l][m] += (
+                        dCdz2[j]
+                        * w2[j][l]
+                        * a1[l]
+                        * (1 - a1[l])
+                        * a0[m]
+                    )
+		# sum the contribution of each data for the b1  updating
+        for l in range( d_h ):
+            for j in range( d_out ):
+                dCdb1[0][l] += (
+                    dCdz2[j]  
+                    * w2[j][l] 
+                    * a1[l] 
+                    * (1 - a1[l])
+                )
+	#Average value of each data contribution
+    dCdw1 = dCdw1 / N
+    dCdb1 = dCdb1 / N
+    dCdw2 = dCdw2 / N
+    dCdb2 = dCdb2 / N
+	#Arrays update
+    w1 -= learning_rate * dCdw1
+    b1 -= learning_rate * dCdb1
+    w2 -= learning_rate * dCdw2
+    b2 -= learning_rate * dCdb2
+    # realizing a new network interaction with new values
+    a0 = data  # the data are the input of the first layer
+    new_z1 = np.matmul(a0, new_w1) + new_b1  # input of the hidden layer
+    new_a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+    z2 = np.matmul(new_a1, new_w2) + new_b2  # input of the output layer
+    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
+    predictions = a2  # the predicted values are the outputs of the output layer
+    # Compute loss (Entropy Loss)
+	loss = np.mean( ( -1 * oneHot * np.log( predictions ) ) - ( 1 - oneHot ) * np.log( 1 - predictions ) )
+    return w1, b1, w2, b2, loss