Skip to content
Snippets Groups Projects
Commit f9861327 authored by Muniz Silva Samuel's avatar Muniz Silva Samuel
Browse files

final commit

parent 8f193998
Branches
No related tags found
No related merge requests found
......@@ -191,36 +191,93 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
def train_mlp(w1, b1, w2, b2, data, labels_train, learning_rate, num_epoch):
"""the function train_mlp taking as parameters:
w1, b1, w2 and b2 the weights and biases of the network,
data_train a matrix of shape (batch_size x d_in),
labels_train a vector of size batch_size,
learning_rate the learning rate, and
num_epoch the number of training epoch,
that perform num_epoch of training steps and returns:
w1, b1, w2 and b2 the updated weights and biases of the network,
train_accuracies the list of train accuracies across epochs as a list of floats."""
(
d_in,
d_h,
) = w1.shape # extracts the dimensions of the variables to define future np.arrays
N = labels_train.shape[0]
d_out = b2.shape[0]
oneHot = one_hot(labels_train)
train_accuracies = [] # create the list of accuracy
train_accuracies = []
for i in range(num_epoch):
w1, b1, w2, b2, loss = learn_once_cross_entropy(
w1, b1, w2, b2, data, labels_train, learning_rate
)
train_accuracies.append(loss)
return w1, b1, w2, b2, train_accuracies
# Create the gradient for the variables w2,b2,w1,b1
dCdw2 = np.zeros((d_h, d_out))
dCdb2 = np.zeros(d_out)
dCdw1 = np.zeros((d_in, d_h))
dCdb1 = np.zeros(d_h)
################################################################################
# RANDOM TEST
######################################################################################
N = 30 # number of input data
d_in = 3 # input dimension
d_h = 5 # number of neurons in the hidden layer
d_out = 10 # output dimension (number of neurons of the output layer)
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros(d_h) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros(d_out) # second layer biaises
data = np.random.rand(N, d_in) # create a random data
# targets = np.random.rand(N, d_out) # create a random targets
labels_train = np.random.randint(3, size=N) # create a random targets
# print(w1, b1, w2, b2)
# print('NEW VALUES')
w1, b1, w2, b2, loss = learn_once_cross_entropy(
w1, b1, w2, b2, data, labels_train, 0.01
)
# print(w1, b1, w2, b2, loss)
# take each data with its respective labels
for dataRow, oneHotRow in zip(data, oneHot):
a0 = dataRow # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (1 + np.exp(-z2)) # output of the output layer
# the predicted values are the outputs of the output layer
dCdz2 = a2 - oneHotRow
# sum the contribution of each data for the w2 updating
for r in range(d_h):
for c in range(d_out):
dCdw2[r][c] += dCdz2[c] * a1[r]
# sum the contribution of each data for the b2 updating
for r in range(d_out):
dCdb2[r] += dCdz2[r]
# sum the contribution of each data for the w1 updating
for r in range(d_in):
for c in range(d_h):
for j in range(d_out):
dCdw1[r][c] += dCdz2[j] * w2[c][j] * a1[c] * (1 - a1[c]) * a0[r]
# sum the contribution of each data for the b1 updating
for r in range(d_h):
for j in range(d_out):
dCdb1[r] += dCdz2[j] * w2[r][j] * a1[r] * (1 - a1[r])
# Average value of each data contribution
dCdw1 = dCdw1 / N
dCdb1 = dCdb1 / N
dCdw2 = dCdw2 / N
dCdb2 = dCdb2 / N
# Arrays update
w1 -= learning_rate * dCdw1
b1 -= learning_rate * dCdb1
w2 -= learning_rate * dCdw2
b2 -= learning_rate * dCdb2
# realizing a new network interaction with new values
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (1 + np.exp(-z2)) # output of the output layer
predictions = a2 # the predicted values are the outputs of the output layer
for predictionRow, oneHotRow in zip(
predictions, oneHot
): # take the predicteds values and the true one Hot labels
for index, value in enumerate(predictionRow):
# Verifies if the maximum value of the output layer has the same possition that the one Hot labels
if value == max(predictionRow) and oneHotRow[index] == 1:
# if YES, then the classification is right and we increase by one unit the accuracy
accuracy += 1
# divide the ratio of right answers by the total numbers of data
accuracy = accuracy / N
# Add in the last accuracy inside the train_accuracies list
train_accuracies.append(accuracy)
return w1, b1, w2, b2, train_accuracies
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment