From aba7e23be545ce64a4128efc37c7f83847002e36 Mon Sep 17 00:00:00 2001 From: Muniz Silva Samuel <samuel.muniz-silva@ecl21.ec-lyon.fr> Date: Tue, 1 Nov 2022 23:45:52 +0000 Subject: [PATCH] final --- mlp.py | 201 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 188 insertions(+), 13 deletions(-) diff --git a/mlp.py b/mlp.py index 160c3d2..5a91c0d 100644 --- a/mlp.py +++ b/mlp.py @@ -1,26 +1,201 @@ import numpy as np -import tensorflow as tf import pandas as pd -def sigm(x): - y = 1 / (1 + np.exp(-x)) - return y -def learn_once_mse(w1,b1,w2,b2,data,targests,learning_rate): +def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): + """Take the arrays w1,b1,w2,b2 of a 2-layers neural network + ,update them with a gradient descent + and calculate the average lost the MSE method """ + + d_in , d_h = w1.shape # extracts the dimensions of the variables to define future np.arrays + N , d_out = targets.shape + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer + predictions = a2 # the predicted values are the outputs of the output layer + #Create the gradient for the variables w2,b2,w1,b1 + dCdw2 = np.zeros((d_h, d_out)) + dCdb2 = np.zeros((1, d_out)) + dCdw1 = np.zeros((d_in, d_h)) + dCdb1 = np.zeros((1, d_h)) + + #take each data with its respective labels + for dataRow, targetsRow in zip(data, targets): - return w1,b1,w2,b2,loss + a0 = dataRow # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer + predictionsRow = a2 # the predicted values are the outputs of the output layer + # Calculate the partial derivative of the cost in relaltion to each network output + dCda = 2 * (predictionsRow - targetsRow) + # sum the contribution of each data for the w2 updating + for l in range( d_h ): + for m in range( d_out ): + dCdw2[l][m] += ( + dCda[l] + * a2[l] + * (1 - a2[l]) + * a1[m] + ) + # sum the contribution of each data for the b2 updating + for l in range( d_out ): + dCdb2[0][l] += ( + dCda[l] + * a2[l] + * (1 - a2[l]) + ) + # sum the contribution of each data for the w1 updating + for l in range( d_in ): + for m in range( d_h ): + for j in range( d_out ): + dCdw1[l][m] += ( + dCda[j] + * a2[j] + * (1 - a2[j]) + * w2[j][l] + * a1[l] + * (1 - a1[l]) + * a0[m] + ) + # sum the contribution of each data for the b1 updating + for l in range( d_h ): + for j in range( d_out ): + dCdb1[0][l] += ( + dCda[j] + * a2[j] + * (1 - a2[j]) + * w2[j][l] + * a1[l] + * (1 - a1[l]) + ) + + #Average value of each data contribution + dCdw1 = dCdw1 / N + dCdb1 = dCdb1 / N + dCdw2 = dCdw2 / N + dCdb2 = dCdb2 / N + + #Arrays update + w1 -= learning_rate * dCdw1 + b1 -= learning_rate * dCdb1 + w2 -= learning_rate * dCdw2 + b2 -= learning_rate * dCdb2 + + # realizing a new network interaction with new values + a0 = data # the data are the input of the first layer + new_z1 = np.matmul(a0, new_w1) + new_b1 # input of the hidden layer + new_a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer + z2 = np.matmul(new_a1, new_w2) + new_b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer + predictions = a2 # the predicted values are the outputs of the output layer + + # Compute loss (MSE) + loss = np.mean(np.square(predictions - targets)) + + return w1, b1, w2, b2, loss + def one_hot(labels): - oneHotMat = np.zeros((labels.size,labels.size),dtype=int) - for index,values in enumerate(labels): - oneHotMat[index,values] = 1 - return oneHotMat + """Returns the 2d array with binary vectors with the 1's in the respective position of the sort matrix""" + oneHotMat = np.zeros((labels.size, labels.size), dtype=int) + + for index, values in enumerate(labels): + oneHotMat[index, values] = 1 + + return oneHotMat + +def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): + """Take the arrays w1,b1,w2,b2 of a 2-layers neural network + ,update them with a gradient descent + and calculate the average lost the cross - entropy method """ + + d_in , d_h = w1.shape # extracts the dimensions of the variables to define future np.arrays + N , d_out = targets.shape + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer + predictions = a2 # the predicted values are the outputs of the output layer + oneHot = one_hot(labels_train) + + #Create the gradient for the variables w2,b2,w1,b1 + dCdw2 = np.zeros((d_h, d_out)) + dCdb2 = np.zeros((1, d_out)) + dCdw1 = np.zeros((d_in, d_h)) + dCdb1 = np.zeros((1, d_h)) + + #take each data with its respective labels + for dataRow, oneHotLabel in zip(data, oneHot): -def learn_once_cross_entropy(): + a0 = dataRow # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer + predictionsRow = a2 # the predicted values are the outputs of the output layer + dCdz2 = predictionsRow - oneHotLabel + + # sum the contribution of each data for the w2 updating + for l in range( d_h ): + for m in range( d_out ): + dCdw2[l][m] += ( + dCdz2[l] + * a1[m] ) + + # sum the contribution of each data for the b2 updating + for l in range( d_out ): + dCdb2[0][l] += ( + dCdz2[l] + ) + # sum the contribution of each data for the w1 updating + for l in range( d_in ): + for m in range( d_h ) : + for j in range( d_out ): + dCdw1[l][m] += ( + dCdz2[j] + * w2[j][l] + * a1[l] + * (1 - a1[l]) + * a0[m] + ) + # sum the contribution of each data for the b1 updating + for l in range( d_h ): + for j in range( d_out ): + dCdb1[0][l] += ( + dCdz2[j] + * w2[j][l] + * a1[l] + * (1 - a1[l]) + ) - return + #Average value of each data contribution + dCdw1 = dCdw1 / N + dCdb1 = dCdb1 / N + dCdw2 = dCdw2 / N + dCdb2 = dCdb2 / N + + #Arrays update + w1 -= learning_rate * dCdw1 + b1 -= learning_rate * dCdb1 + w2 -= learning_rate * dCdw2 + b2 -= learning_rate * dCdb2 + # realizing a new network interaction with new values + a0 = data # the data are the input of the first layer + new_z1 = np.matmul(a0, new_w1) + new_b1 # input of the hidden layer + new_a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer + z2 = np.matmul(new_a1, new_w2) + new_b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer + predictions = a2 # the predicted values are the outputs of the output layer + # Compute loss (Entropy Loss) + + loss = np.mean( ( -1 * oneHot * np.log( predictions ) ) - ( 1 - oneHot ) * np.log( 1 - predictions ) ) -print(one_hot(np.array([1,2,0,4,3]))) \ No newline at end of file + return w1, b1, w2, b2, loss -- GitLab