diff --git a/mlp.py b/mlp.py index f0d84e019172cf6ec8d0fb16b58d3808ad805aa0..bfc602bed3a8aeb5ee01b800c58acf0daaff20ff 100644 --- a/mlp.py +++ b/mlp.py @@ -1,4 +1,6 @@ import numpy as np +from read_cifar import * +import matplotlib.pyplot as plt def sigmoid(x): return 1 / (1 + np.exp(-x)) @@ -57,7 +59,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): targets_one_hot = one_hot(labels_train) # target as a one-hot encoding for the desired labels - # cross-entropy loss + # Cross-entropy loss loss = -np.sum(targets_one_hot * np.log(predictions)) / N # Backpropagation @@ -94,18 +96,77 @@ def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch # Find the predicted class prediction = np.argmax(predictions, axis = 1) - # Calculate the accuracy + # Calculate the accuracy for the step accuracy = np.mean(labels_train == prediction) - train_accuracies[i] = accuracy - + train_accuracies[i] = accuracy return w1, b1, w2, b2, train_accuracies def test_mlp(w1, b1, w2, b2, data_test, labels_test): + + # Forward pass + a0 = data_test # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + # Find the predicted label + prediction = np.argmax(predictions, axis = 1) + + # Calculation of the test accuracy + test_accuracy = np.mean(prediction == labels_test) + return test_accuracy + +def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch): + + # Define parameters + d_in = data_train.shape[1] # number of input neurons + d_out = len(np.unique(labels_train)) # number of output neurons = number of classes + + # Random initialization of the network weights and biaises + w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights + b1 = np.zeros((1, d_h)) # first layer biaises + w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights + b2 = np.zeros((1, d_out)) # second layer biaises + + # Training of the MLP classifier with num_epoch steps + w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) + + # Caculation of the final testing accuracy with the new values of the weights and bias + test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + + return train_accuracies, test_accuracy + + +if __name__ == "__main__": + + split_factor = 0.9 + d_h = 64 + learning_rate = 0.1 + num_epoch = 100 + + data, labels = read_cifar("./data/cifar-10-batches-py") + data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split_factor) + epochs = [i for i in range(1, num_epoch + 1)] + learning_accuracy = [0] * num_epoch + + for i in range(num_epoch) : + train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, i + 1) + learning_accuracy[i] = test_accuracy + + plt.plot(epochs, learning_accuracy) + plt.title("Evolution of learning accuracy across learning epochs") + plt.xlabel("number of epochs") + plt.ylabel("Accuracy") + plt.grid(True, which='both') + plt.savefig("results/mlp.png") +