From d9474802a0d703d63eb405e3adc6126ac665de89 Mon Sep 17 00:00:00 2001 From: Chauvin Hugo <hugo.chauvin@etu.ec-lyon.fr> Date: Fri, 10 Nov 2023 15:14:16 +0000 Subject: [PATCH] Add new file --- mlp.py | 206 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 mlp.py diff --git a/mlp.py b/mlp.py new file mode 100644 index 0000000..d989155 --- /dev/null +++ b/mlp.py @@ -0,0 +1,206 @@ +import numpy as np + +def sigma(x) : + return 1 / (1 + np.exp(-x)) + +def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) : + N = data.shape[0] + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = sigma(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = sigma(z2) # output of the output layer (sigmoid activation function) + predictions = a2 # the predicted values are the outputs of the output layer + + # Compute loss (MSE) + loss = np.mean(np.square(predictions - targets)) + + # Adjustment of w1, b1, w2, b2 with their gradients + # A REVOIR !!!!!!!!!!!!!!!!!!!!!!!!!!!! + dCda2 = 2/N * (a2 - targets) + dCdz2 = dCda2 * a2 * (1 - a2) + dCdw2 = np.matmul(transpose(a2),dCdz2) + dCdb2 = dCdz2 + dCdz1 = np.matmul(dCda2, transpose(w2) * a1 * (1 - a1)) + dCdw1 = np.matmul(transpose(a2), dCdz1) + dCdb1 = dCdw1 + + # Correction of the w1, b1, w2, b2 values + w1 += -learning_rate * dCdw1 + b1 += -learning_rate * dCdb1 + w2 += -learning_rate * dCdw2 + b2 += -learning_rate * dCdb2 + + return w1, b1, w2, b2, loss + +def one_hot(labels) : + array = np.zeros((len(labels),len(labels)), dtype=np.int) + for i in range(len(labels)) : + array[i,labels[i]] = 1 + return array + +def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate) : + a0 = data + z1 = np.matmul(a0, w1) + b1 + a1 = sigma(z1) + z2 = np.matmul(a1, w2) + b2 + a2 = sigma(z2) + + # Creation of the encoded vector and calculation of gradients + Y = one_hot(labels_train) + dCdz2 = a2 - Y + dCdw2 = np.matmul(transpose(a2),dCdz2) + dCdb2 = dCdz2 + dCdz1 = np.matmul(dCda2, transpose(w2) * a1 * (1 - a1)) + dCdw1 = np.matmul(transpose(a2), dCdz1) + dCdb1 = dCdw1 + + # Gradient descent + w1 += -learning_rate * dCdw1 + b1 += -learning_rate * dCdb1 + w2 += -learning_rate * dCdw2 + b2 += -learning_rate * dCdb2 + + # Loss calculation + loss = np.mean(np.square(a2-labels_train)) + + return w1, b1, w2, b2, loss + +def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) : + train_accuracies = np.zeros(num_epoch) + for i in range(num_epoch) : + # Creation of weights and biases of the network + w1, b1, w2, b2, _ = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate) + + # Calculation of the output a2 + a0 = data_train + z1 = np.matmul(a0, w1) + b1 + a1 = sigma(z1) + z2 = np.matmul(a1, w2) + b2 + a2 = sigma(z2) + + # Accuracy calculation + count = 0 + for j in range(len(a2)) : + if a2[j] == labels_train[j,0] : + count += 1/len(a2) + train_accuracies[i] = count + + return w1, w2, b1, b2, train_accuracies + +def test_mlp(w1, b1, w2, b2, data_test, labels_test) : + # Calculation of the output a2 + a0 = data_test + z1 = np.matmul(a0, w1) + b1 + a1 = sigma(z1) + z2 = np.matmul(a1, w2) + b2 + a2 = sigma(z2) + + # Accuracy calculation + test_accuracy = 0 + for j in range(len(a2)) : + if a2[j] == labels_train[j,0] : + test_accuracy += 1/len(a2) + + return test_accuracy + +def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch) : + # Setting of the neuron numbers of the network + d_in = data_train.shape[1] + d_out = data_test.shape[0] + + # Random initialization of the network weights and biaises + w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights + b1 = np.zeros((1, d_h)) # first layer biaises + w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights + b2 = np.zeros((1, d_out)) # second layer biaises + + # Accuracy calculation of the train and test sets + w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) + w1, b1, w2, b2, test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + + return train_accuracies, test_accuracy + + + + + +## +# COPY OF READCIFAR.PY AS I WAS UNABLE TO IMPORT IT + +import numpy as np +import os +import pickle +import random + +def unpickle(file): + import pickle + with open(file, 'rb') as f: + dict = pickle.load(f, encoding='bytes') + return dict + +def read_cifar_batch(batch_path) : + with open(batch_path, 'rb') as file: + # On unpickle le batch + batch = pickle.load(file, encoding='bytes') + + # Extraction de data et labels + data = np.array(batch[b'data'], dtype=np.float32)/255.0 + labels = np.array(batch[b'labels'], dtype = np.int64) + + return data, labels + +def read_cifar(batch_dir): + data_batches = [] + label_batches = [] + + # Itération sur les batches + for file_name in os.listdir(batch_dir): + if file_name.startswith("data_batch") or file_name.startswith("test_batch") : + batch_path = os.path.join(batch_dir, file_name) + data, labels = read_cifar_batch(batch_path) + data_batches.append(data) + label_batches.append(labels) + + # On combine data et labels depuis tous les batches + data = np.concatenate(data_batches, axis=0) + labels = np.concatenate(label_batches, axis=0) + + return data, labels + +def split_dataset(data, labels, split): + # On vérifie la bonne dimension de data et labels + if data.shape[0] != labels.shape[0]: + return OSError("data et labels doivent avoir le même nombre de lignes !") + + # On détermine la taille des data train et test + train_size = round(data.shape[0]*split) + + # On shuffle les data et labels + shuffle_index = [i for i in range(data.shape[0])] + + # On extirpe les data/labels train et test + data_train = data[shuffle_index][:train_size] + labels_train = np.array([[labels[i]] for i in shuffle_index])[:train_size] + data_test = data[shuffle_index][train_size:] + labels_test = np.array([[labels[i]] for i in shuffle_index])[train_size:] + + return data_train, labels_train, data_test, labels_test + +## + + + + + +if __name__ == "__main__" : + + data_folder = 'C:\\Users\\hugol\\Desktop\\Centrale Lyon\\Centrale Lyon 4A\\Informatique\\Machine Learning\\BE1\\cifar-10-batches-py' + data, labels = read_cifar(data_folder) + data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.9) + train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h=64, learning_rate=0.1, num_epoch=100) + + plt.figure() + plt.plot(range(len(train_accuracies)), train_accuracies) + plt.show() -- GitLab