mlp

0687ccef · Danjou Pierre · d37eb565 · 0687ccef · 0687ccef
Commit 0687ccef authored 7 months ago by Danjou Pierre
--- a/mlp.png
+++ b/mlp.png
--- a/mlp.py
+++ b/mlp.py
@@ -27,30 +27,17 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):

    # Compute loss (MSE)
    loss = np.mean(np.square(predictions - targets))
-    print(f'loss: {loss}')
-    print('shape a1', a1.shape)
-    print('shape w1', w1.shape)
-    print('shape b1', b1.shape)
    
-    print('shape a2', a2.shape)
-    print('shape w2', w2.shape)
-    print('shape b2', b2.shape)
   
    # Backpropagation
    delta_a2 = 2 / N_out * (a2 - targets)
-    print('shape delta_a2', delta_a2.shape)
    delta_z2 = delta_a2 * (a2 * (1 - a2)) 
-    print('shape delta_z2', delta_z2.shape)
    delta_w2 = np.dot(a1.T, delta_z2)
-    print('shape delta_w2', delta_w2.shape)
    delta_b2 = delta_z2

    delta_a1 = np.dot(delta_z2, w2.T)
-    print('shape delta_a1', delta_a1.shape)
    delta_z1 = delta_a1 * (a1 * (1- a1))
-    print('shape delta_z1', delta_z1.shape)
    delta_w1 = np.dot(a0.T, delta_z1)
-    print('shape delta_w1', delta_w2.shape)
    delta_b1 = delta_z1

    # Update weights and biases
@@ -88,12 +75,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
    z2 = np.matmul(a1, w2) + b2  # input of the output layer
    a2 = softmax_stable(z2)  # output of the output layer (sigmoid activation function)
    predictions = a2  # the predicted values are the outputs of the output layer
-    # print('a0', a0[:2])
-    # print('w1', w1[:2])
-    # print('z1', z1[:2])
-    # print('a1', a1[:2])
-    # print('z2', z2[:2])
-    # print('a2', a2[:2])
+  

    # Compute loss (cross-entropy loss)
    y_true_one_hot = one_hot(labels_train)
@@ -158,7 +140,7 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear
    d_in = data_train.shape[1]
    d_out = 10 #we can hard code it here or len(np.unique(label_train))

-    #Random initialisation of weights Xavier initialisation
+    #Random initialisation
    w1 = np.random.randn(d_in, d_h) / np.sqrt(d_in)
    b1 = np.zeros((1, d_h))
    w2 = np.random.randn(d_h, d_out) / np.sqrt(d_h)
@@ -172,7 +154,6 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear
    return train_accuracies, test_accuracy

 def plot_graph(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch):
-    # Run MLP training
    train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch)
    
    # Plot and save the learning accuracy graph
@@ -184,7 +165,7 @@ def plot_graph(data_train, labels_train, data_test, labels_test, d_h, learning_r
    plt.title('MLP Train Accuracy')
    plt.legend()
    plt.grid(True)
-    plt.savefig(r'C:\Users\danjo\Documents\GitHub\image-classification\results')
+    plt.savefig(r'C:\Users\danjo\Documents\GitHub\image-classification\mlp')
    plt.show()
    return()

@@ -198,7 +179,7 @@ if __name__ == "__main__":
   
   d_in, d_h, d_out = 3072, 64, 10
   learning_rate = 0.1
-   num_epoch = 5
+   num_epoch = 100
   
   
    #Initialisation 
@@ -209,7 +190,6 @@ if __name__ == "__main__":

   #train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)

-   test_mlp(w1, b1, w2, b2, data_test[:50], labels_test[:50])
    
   plot_graph(data_train, labels_train, data_test, labels_test , d_h, learning_rate, num_epoch)