diff --git a/mlp.py b/mlp.py new file mode 100644 index 0000000000000000000000000000000000000000..4c85fa722b35526276c3dc1dc922cd921507f5ac --- /dev/null +++ b/mlp.py @@ -0,0 +1,190 @@ +import numpy as np +import matplotlib.pyplot as plt +from read_cifar import read_cifar_batch, read_cifar, split_dataset + +N = 30 # number of input data +d_in = 3 # input dimension +d_h = 3 # number of neurons in the hidden layer +d_out = 2 # output dimension (number of neurons of the output layer) + +# Random initialization of the network weights and biaises +w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights +b1 = np.zeros((1, d_h)) # first layer biaises +w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights +b2 = np.zeros((1, d_out)) # second layer biaises + +data = np.random.rand(N, d_in) # create a random data +targets = np.random.rand(N, d_out) # create a random targets + +def sigmoid(x): + return 1 / (1 + np.exp(-np.clip(x, -500, 500))) + +def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + # Compute loss (MSE) + loss = np.mean(np.square(predictions - targets)) + print(loss) + + + # Backpropagation + dC_da2 = 2 * np.sum(predictions - targets) / N + dC_dz2 = dC_da2 * predictions * (1 - predictions) + dC_dw2 = np.matmul(a1.T, dC_dz2) + dC_db2 = np.sum(dC_dz2, axis=0) + + dC_da1 = np.matmul(dC_dz2, w2.T) + dC_dz1 = dC_da1 * a1 * (1 - a1) + dC_dw1 = np.matmul(a0.T, dC_dz1) + dC_db1 = np.sum(dC_dz1, axis=0) + + # Update weights and biases + w2 = w2 - learning_rate * dC_dw2 + b2 = b2 - learning_rate * dC_db2 + + w1 = w1 - learning_rate * dC_dw1 + b1 = b1 - learning_rate * dC_db1 + + return w1, b1, w2, b2, loss + +#Convert a list into a matrix +def one_hot(labels): + n_samples = len(labels) + n_unique = len(np.unique(labels)) + one_hot_matrix = np.zeros((n_samples, n_unique)) + one_hot_matrix[np.arange(n_samples), labels] = 1 + return one_hot_matrix + +def softmax(z): + exp_z = np.exp(z) + sum = exp_z.sum() + softmax_z = exp_z / sum + return softmax_z + +def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): + + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = softmax(z2) #Softmax activation layer + predict = a2 # the predicted values are the outputs of the output layer + + targets = one_hot(labels_train) + + # Compute the loss + loss = loss = -np.sum(targets * np.log(predict)) + + # Backpropagation + dC_da2 = 2 * np.sum(predict - targets) / N + dC_dz2 = (2 / N) * (predict - targets) * a2 * (1 - predict) + dC_dw2 = np.dot(a1.T, dC_dz2) + dC_db2 = dC_dz2 + + dC_da1 = np.dot(dC_dz2, w2.T) + dC_dz1 = dC_da1 * a1 * (1 - a1) + dC_dw1 = np.dot(a0.T, dC_dz1) + dC_db1 = np.sum(dC_dz1, axis=0) + + # Update weights and biases + w2 = w2 - learning_rate * dC_dw2 + b2 = b2 - learning_rate * dC_db2 + + w1 = w1 - learning_rate * dC_dw1 + b1 = b1 - learning_rate * dC_db1 + + return w1, b1, w2, b2, loss + +def accuracy(labels, predictions): + return np.mean(labels == predictions) + +def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch): + train_accuracies = [] + batch_size, d_in = data_train.shape + train_accuracies = [] + N = len(labels_train) # Number of samples in the training set + # Ensure num_epoch is an integer + num_epoch = int(np.isscalar(num_epoch)) + + for i in range(num_epoch): + # Forward propagation + z1 = np.dot(data_train, w1) + b1 + a1 = sigmoid(z1) + z2 = np.dot(a1, w2) + b2 + a2 = softmax(z2) + + predicted_labels = np.argmax(a2, axis = 1) # the predicted labels + + # Compute the accuracy on the training set + train_accuracy = accuracy(labels_train, predicted_labels) + train_accuracies.append(train_accuracy) + # train_accurary = np.mean(np.array(predicted_labels) == np.array(labels_train)) + + + return w1, b1, w2, b2, train_accuracies + +def test_mlp(w1, b1, w2, b2, data_test, labels_test): + predictions = [] + + # Forward propagation + a0 = data_test + z1 = np.dot(a0, w1) + b1 + a1 = sigmoid(z1) + z2 = np.dot(a1, w2) + b2 + a2 = sigmoid(z2) + + + predicted_labels_test = np.argmax(a2, axis=1) + test_accuracy = np.mean(predicted_labels_test == labels_test) + + return test_accuracy + + +def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs): + d_in = data_train.shape[1] + d_out = len(np.unique(labels_train)) + + # Initialize weights and biases + w1 = np.random.randn(d_in, d_h) + b1 = np.zeros((1, d_h)) + w2 = np.random.randn(d_h, d_out) + b2 = np.zeros((1, d_out)) + + # Train MLP + w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs) + + # Test MLP + final_test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + + return train_accuracies, final_test_accuracy + + +if __name__ == "__main__": + + # Create train and test datasets + data, labels = read_cifar(r"C:\Users\etulyon1\OneDrive\Desktop\ECL\Apprentissage profond & Intelligence Artificielle\BE1\image-classification\data\cifar-10-batches-py") + split_factor = 0.9 + a, b, c, d = split_dataset(data, labels, split_factor) + + # Define the network hyper-parameters and train it + d_h = 64 + learning_rate = 0.1 + num_epoch = 100 + + train_accuracies, final_accuracy = run_mlp_training(a, b, c, d, d_h, learning_rate, num_epoch) + print("The accuracy of the network on the test dataset is " + str(final_accuracy) + "%") + + # Plot the evolution of the accuracy with the number training steps + plt.plot(train_accuracies) + plt.xlabel("Number of training steps") + plt.ylabel("Accuracy (in %)") + plt.title("Accuracy of the Neural Network depending on the number of iterations of training") + plt.show() +