final commit

f9861327 · Muniz Silva Samuel · 8f193998 · f9861327
Commit f9861327 authored 2 years ago by Muniz Silva Samuel
--- a/mlp.py
+++ b/mlp.py
@@ -191,36 +191,93 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):


 def train_mlp(w1, b1, w2, b2, data, labels_train, learning_rate, num_epoch):
+    """the function train_mlp taking as parameters:
+    w1, b1, w2 and b2 the weights and biases of the network,
+    data_train a matrix of shape (batch_size x d_in),
+    labels_train a vector of size batch_size,
+    learning_rate the learning rate, and
+    num_epoch the number of training epoch,
+    that perform num_epoch of training steps and returns:
+    w1, b1, w2 and b2 the updated weights and biases of the network,
+    train_accuracies the list of train accuracies across epochs as a list of floats."""
+
+    (
+        d_in,
+        d_h,
+    ) = w1.shape  # extracts the dimensions of the variables to define future np.arrays
+    N = labels_train.shape[0]
+    d_out = b2.shape[0]
+    oneHot = one_hot(labels_train)
+    train_accuracies = []  # create the list of accuracy

-    train_accuracies = []
    for i in range(num_epoch):
-        w1, b1, w2, b2, loss = learn_once_cross_entropy(
-            w1, b1, w2, b2, data, labels_train, learning_rate
-        )
-        train_accuracies.append(loss)
-    return w1, b1, w2, b2, train_accuracies

+        # Create the gradient for the variables w2,b2,w1,b1
+        dCdw2 = np.zeros((d_h, d_out))
+        dCdb2 = np.zeros(d_out)
+        dCdw1 = np.zeros((d_in, d_h))
+        dCdb1 = np.zeros(d_h)

-################################################################################
-# RANDOM TEST
-######################################################################################
-N = 30  # number of input data
-d_in = 3  # input dimension
-d_h = 5  # number of neurons in the hidden layer
-d_out = 10  # output dimension (number of neurons of the output layer)
-
-# Random initialization of the network weights and biaises
-w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
-b1 = np.zeros(d_h)  # first layer biaises
-w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
-b2 = np.zeros(d_out)  # second layer biaises
-
-data = np.random.rand(N, d_in)  # create a random data
-# targets = np.random.rand(N, d_out)  # create a random targets
-labels_train = np.random.randint(3, size=N)  # create a random targets
-# print(w1, b1, w2, b2)
-# print('NEW VALUES')
-w1, b1, w2, b2, loss = learn_once_cross_entropy(
-    w1, b1, w2, b2, data, labels_train, 0.01
-)
-# print(w1, b1, w2, b2, loss)
+        # take each data with its respective labels
+        for dataRow, oneHotRow in zip(data, oneHot):
+            a0 = dataRow  # the data are the input of the first layer
+            z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+            a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+            z2 = np.matmul(a1, w2) + b2  # input of the output layer
+            a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
+            # the predicted values are the outputs of the output layer
+            dCdz2 = a2 - oneHotRow
+
+            # sum the contribution of each data for the w2  updating
+            for r in range(d_h):
+                for c in range(d_out):
+                    dCdw2[r][c] += dCdz2[c] * a1[r]
+
+            # sum the contribution of each data for the b2  updating
+            for r in range(d_out):
+                dCdb2[r] += dCdz2[r]
+
+            # sum the contribution of each data for the w1  updating
+            for r in range(d_in):
+                for c in range(d_h):
+                    for j in range(d_out):
+                        dCdw1[r][c] += dCdz2[j] * w2[c][j] * a1[c] * (1 - a1[c]) * a0[r]
+
+            # sum the contribution of each data for the b1  updating
+            for r in range(d_h):
+                for j in range(d_out):
+                    dCdb1[r] += dCdz2[j] * w2[r][j] * a1[r] * (1 - a1[r])
+
+        # Average value of each data contribution
+        dCdw1 = dCdw1 / N
+        dCdb1 = dCdb1 / N
+        dCdw2 = dCdw2 / N
+        dCdb2 = dCdb2 / N
+
+        # Arrays update
+        w1 -= learning_rate * dCdw1
+        b1 -= learning_rate * dCdb1
+        w2 -= learning_rate * dCdw2
+        b2 -= learning_rate * dCdb2
+
+        # realizing a new network interaction with new values
+        a0 = data  # the data are the input of the first layer
+        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+        a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer
+        z2 = np.matmul(a1, w2) + b2  # input of the output layer
+        a2 = 1 / (1 + np.exp(-z2))  # output of the output layer
+        predictions = a2  # the predicted values are the outputs of the output layer
+        for predictionRow, oneHotRow in zip(
+            predictions, oneHot
+        ):  # take the predicteds values and the true one Hot labels
+            for index, value in enumerate(predictionRow):
+                # Verifies if the maximum value of the output layer has the same possition that the one Hot labels
+                if value == max(predictionRow) and oneHotRow[index] == 1:
+                    # if YES, then the classification is right and we increase by one unit the accuracy
+                    accuracy += 1
+        # divide the ratio of right answers by the total numbers of data
+        accuracy = accuracy / N
+        # Add in the last accuracy inside the train_accuracies list
+        train_accuracies.append(accuracy)
+
+    return w1, b1, w2, b2, train_accuracies