diff --git a/mlp.png b/mlp.png new file mode 100644 index 0000000000000000000000000000000000000000..83352343a69a7879686c6ac19c0816b071697b1d Binary files /dev/null and b/mlp.png differ diff --git a/mlp.py b/mlp.py index 17fa80c327a0bf804a6f6b91faafbe2476af2a1b..307815037f66688664406eabe3f77a0bec897768 100644 --- a/mlp.py +++ b/mlp.py @@ -27,30 +27,17 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): # Compute loss (MSE) loss = np.mean(np.square(predictions - targets)) - print(f'loss: {loss}') - print('shape a1', a1.shape) - print('shape w1', w1.shape) - print('shape b1', b1.shape) - - print('shape a2', a2.shape) - print('shape w2', w2.shape) - print('shape b2', b2.shape) + # Backpropagation delta_a2 = 2 / N_out * (a2 - targets) - print('shape delta_a2', delta_a2.shape) delta_z2 = delta_a2 * (a2 * (1 - a2)) - print('shape delta_z2', delta_z2.shape) delta_w2 = np.dot(a1.T, delta_z2) - print('shape delta_w2', delta_w2.shape) delta_b2 = delta_z2 delta_a1 = np.dot(delta_z2, w2.T) - print('shape delta_a1', delta_a1.shape) delta_z1 = delta_a1 * (a1 * (1- a1)) - print('shape delta_z1', delta_z1.shape) delta_w1 = np.dot(a0.T, delta_z1) - print('shape delta_w1', delta_w2.shape) delta_b1 = delta_z1 # Update weights and biases @@ -88,12 +75,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): z2 = np.matmul(a1, w2) + b2 # input of the output layer a2 = softmax_stable(z2) # output of the output layer (sigmoid activation function) predictions = a2 # the predicted values are the outputs of the output layer - # print('a0', a0[:2]) - # print('w1', w1[:2]) - # print('z1', z1[:2]) - # print('a1', a1[:2]) - # print('z2', z2[:2]) - # print('a2', a2[:2]) + # Compute loss (cross-entropy loss) y_true_one_hot = one_hot(labels_train) @@ -158,7 +140,7 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear d_in = data_train.shape[1] d_out = 10 #we can hard code it here or len(np.unique(label_train)) - #Random initialisation of weights Xavier initialisation + #Random initialisation w1 = np.random.randn(d_in, d_h) / np.sqrt(d_in) b1 = np.zeros((1, d_h)) w2 = np.random.randn(d_h, d_out) / np.sqrt(d_h) @@ -172,7 +154,6 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear return train_accuracies, test_accuracy def plot_graph(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch): - # Run MLP training train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch) # Plot and save the learning accuracy graph @@ -184,7 +165,7 @@ def plot_graph(data_train, labels_train, data_test, labels_test, d_h, learning_r plt.title('MLP Train Accuracy') plt.legend() plt.grid(True) - plt.savefig(r'C:\Users\danjo\Documents\GitHub\image-classification\results') + plt.savefig(r'C:\Users\danjo\Documents\GitHub\image-classification\mlp') plt.show() return() @@ -198,7 +179,7 @@ if __name__ == "__main__": d_in, d_h, d_out = 3072, 64, 10 learning_rate = 0.1 - num_epoch = 5 + num_epoch = 100 #Initialisation @@ -209,7 +190,6 @@ if __name__ == "__main__": #train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) - test_mlp(w1, b1, w2, b2, data_test[:50], labels_test[:50]) plot_graph(data_train, labels_train, data_test, labels_test , d_h, learning_rate, num_epoch)