diff --git a/mlp.py b/mlp.py index aa626d038348f6c33a0f76efee44b95ca5d0470b..1f236a4619e40e5666d923e881a884e49b8e1950 100644 --- a/mlp.py +++ b/mlp.py @@ -191,36 +191,93 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): def train_mlp(w1, b1, w2, b2, data, labels_train, learning_rate, num_epoch): + """the function train_mlp taking as parameters: + w1, b1, w2 and b2 the weights and biases of the network, + data_train a matrix of shape (batch_size x d_in), + labels_train a vector of size batch_size, + learning_rate the learning rate, and + num_epoch the number of training epoch, + that perform num_epoch of training steps and returns: + w1, b1, w2 and b2 the updated weights and biases of the network, + train_accuracies the list of train accuracies across epochs as a list of floats.""" + + ( + d_in, + d_h, + ) = w1.shape # extracts the dimensions of the variables to define future np.arrays + N = labels_train.shape[0] + d_out = b2.shape[0] + oneHot = one_hot(labels_train) + train_accuracies = [] # create the list of accuracy - train_accuracies = [] for i in range(num_epoch): - w1, b1, w2, b2, loss = learn_once_cross_entropy( - w1, b1, w2, b2, data, labels_train, learning_rate - ) - train_accuracies.append(loss) - return w1, b1, w2, b2, train_accuracies + # Create the gradient for the variables w2,b2,w1,b1 + dCdw2 = np.zeros((d_h, d_out)) + dCdb2 = np.zeros(d_out) + dCdw1 = np.zeros((d_in, d_h)) + dCdb1 = np.zeros(d_h) + + # take each data with its respective labels + for dataRow, oneHotRow in zip(data, oneHot): + a0 = dataRow # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer + # the predicted values are the outputs of the output layer + dCdz2 = a2 - oneHotRow + + # sum the contribution of each data for the w2 updating + for r in range(d_h): + for c in range(d_out): + dCdw2[r][c] += dCdz2[c] * a1[r] + + # sum the contribution of each data for the b2 updating + for r in range(d_out): + dCdb2[r] += dCdz2[r] + + # sum the contribution of each data for the w1 updating + for r in range(d_in): + for c in range(d_h): + for j in range(d_out): + dCdw1[r][c] += dCdz2[j] * w2[c][j] * a1[c] * (1 - a1[c]) * a0[r] + + # sum the contribution of each data for the b1 updating + for r in range(d_h): + for j in range(d_out): + dCdb1[r] += dCdz2[j] * w2[r][j] * a1[r] * (1 - a1[r]) + + # Average value of each data contribution + dCdw1 = dCdw1 / N + dCdb1 = dCdb1 / N + dCdw2 = dCdw2 / N + dCdb2 = dCdb2 / N + + # Arrays update + w1 -= learning_rate * dCdw1 + b1 -= learning_rate * dCdb1 + w2 -= learning_rate * dCdw2 + b2 -= learning_rate * dCdb2 + + # realizing a new network interaction with new values + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer + predictions = a2 # the predicted values are the outputs of the output layer + for predictionRow, oneHotRow in zip( + predictions, oneHot + ): # take the predicteds values and the true one Hot labels + for index, value in enumerate(predictionRow): + # Verifies if the maximum value of the output layer has the same possition that the one Hot labels + if value == max(predictionRow) and oneHotRow[index] == 1: + # if YES, then the classification is right and we increase by one unit the accuracy + accuracy += 1 + # divide the ratio of right answers by the total numbers of data + accuracy = accuracy / N + # Add in the last accuracy inside the train_accuracies list + train_accuracies.append(accuracy) -################################################################################ -# RANDOM TEST -###################################################################################### -N = 30 # number of input data -d_in = 3 # input dimension -d_h = 5 # number of neurons in the hidden layer -d_out = 10 # output dimension (number of neurons of the output layer) - -# Random initialization of the network weights and biaises -w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights -b1 = np.zeros(d_h) # first layer biaises -w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights -b2 = np.zeros(d_out) # second layer biaises - -data = np.random.rand(N, d_in) # create a random data -# targets = np.random.rand(N, d_out) # create a random targets -labels_train = np.random.randint(3, size=N) # create a random targets -# print(w1, b1, w2, b2) -# print('NEW VALUES') -w1, b1, w2, b2, loss = learn_once_cross_entropy( - w1, b1, w2, b2, data, labels_train, 0.01 -) -# print(w1, b1, w2, b2, loss) + return w1, b1, w2, b2, train_accuracies