diff --git a/mlp.py b/mlp.py index 1f236a4619e40e5666d923e881a884e49b8e1950..b4bf0a67670835eef98fd4332c0483a34477a421 100644 --- a/mlp.py +++ b/mlp.py @@ -2,6 +2,25 @@ import numpy as np import pandas as pd +def sigmoid(x): + """sigmoid function, an activation function""" + return 1 / (1 + np.exp(-x)) + + +def softmax(x): + """A softmax activation function""" + return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True) + + +def cross_entropy(y, y_pre): + """A cross entropy loss function""" + + loss = -np.sum(y * np.log(y_pre)) + cross_entropy_loss = loss / float(y_pre.shape[0]) + + return cross_entropy_loss + + def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): """Take the arrays w1,b1,w2,b2 of a 2-layers neural network @@ -281,3 +300,84 @@ def train_mlp(w1, b1, w2, b2, data, labels_train, learning_rate, num_epoch): train_accuracies.append(accuracy) return w1, b1, w2, b2, train_accuracies + + +def test_mlp(w1, b1, w2, b2, data_test, labels_test): + """testing the network on the test set + + Keyword arguments: + w1 -- first layer weights + b1 -- first layer biaises + w2 -- second layer weights + b2 -- second layer biaises + data_test -- data + labels_test -- data classes + """ + + test_accuracy = [] + a0 = data_test # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = softmax(z2) # output of the output layer (softmax activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + predict_classes = np.argmax(predictions, axis=1) + test_accuracy = (predict_classes == labels_test).mean() + + return test_accuracy + + +def run_mlp_training( + data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch +): + """train an MLP classifie : + data_train, labels_train, data_test, labels_test, the training and testing data, + d_h the number of neurons in the hidden layer + learning_rate the learning rate, and + num_epoch the number of training epoch, + that train an MLP classifier and return the training accuracies across epochs as + a list of floats and the final testing accuracy as a float. + """ + + (w1, b1, w2, b2, training_accuracies) = train_mlp( + w1, b1, w2, b2, train_data, labels_train, learning_rate, num_epoch + ) + final_testing_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + + return training_accuracies, final_testing_accuracy + + +def learning_accuracy_evolution( + split_factor=0.9, d_h=64, learning_rate=0.1, num_epoch=100 +): + """plot the evolution of learning accuracy across learning epochs + + Keyword arguments: + split_factor -- split factor of the data + d_h -- the number of neurons in the hidden layer + learning_rate -- the learning rate + num_epoch -- the number of training epoch + """ + + # Random initialization of the network weights and biaises + w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights + b1 = np.zeros((1, d_h)) # first layer biaises + w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights + b2 = np.zeros((1, d_out)) # second layer biaises + + (data, labels) = read_cifar_batch( + "data\cifar-10-batches-py\data_batch_1" + ) # test it with only one batch + (data_train, labels_train, data_test, labels_test) = split_dataset( + data, labels, split_factor + ) # split_factor is 0.9 + + (w1, b1, w2, b2, train_accuracies) = train_mlp( + w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch + ) + learning_epochs = [i for i in range(1, num_epoch + 1)] + + plt.plot(learning_epochs, train_accuracies) + plt.savefig("results/mlp.png") + plt.show()