From f1d0d979267f46c62717a5d08a05947fd70f12a7 Mon Sep 17 00:00:00 2001 From: Malo Bourry <malo.bourry@ecl20.ec-lyon.fr> Date: Mon, 23 Oct 2023 19:26:28 +0200 Subject: [PATCH] end of Neurol Network mlp --- final_run | 10 +++ knn.py | 2 +- mlp.py | 172 +++++++++++++++++++++++++++++++++++++++++++++++++- read_cifar.py | 1 - 4 files changed, 180 insertions(+), 5 deletions(-) create mode 100644 final_run diff --git a/final_run b/final_run new file mode 100644 index 0000000..46dc756 --- /dev/null +++ b/final_run @@ -0,0 +1,10 @@ +from read_cifar import read_cifar, split_dataset +from mlp import run_mlp_training +import matplotlib.pyplot as plt + +if __name__ == "__main__": + data, labels = read_cifar() + data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split=0.9) + list_accuracies, final_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h=64, learning_rate=0.1, num_epoch=100) + plt.plot(list_accuracies) + plt.savefig("accuracies_mlp_network") diff --git a/knn.py b/knn.py index b39e134..25870cf 100644 --- a/knn.py +++ b/knn.py @@ -12,7 +12,7 @@ def distance_matrix(matrix_a: np.ndarray, matrix_b: np.ndarray): return dists def knn_predict(dists: np.ndarray, labels_train: np.ndarray, k:int): - labels_predicts = np.zeros(np.size(dist, 0)) + labels_predicts = np.zeros(np.size(dists, 0)) for i in range(np.size(labels_predicts, 0)): #On extrait les indices des k valeurs plus petites (des k plus proches voisins) k_neighbors_index = np.argmin(dists[i, :], np.sort(dists[i, :])[:k]) diff --git a/mlp.py b/mlp.py index 92a28be..f37afc7 100644 --- a/mlp.py +++ b/mlp.py @@ -2,7 +2,16 @@ import numpy as np import math def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, targets: np.ndarray, learning_rate: float): - + """ + :w1: weights of the first layer of the network. + :b1: bias of the first layer of the network. + :w2: weights of the second layer of the network. + :b2: bias of the second layer of the network. + :data: input vector of the network. + :targets: output vector to reach. + :learning_rate: factor for the gradient descent learning (quickness of the descent). + :return: updated weights and biases of the network after 1 loop of gradient descent. + """ # Forward pass N = np.size(data, 0) a0 = data # the data are the input of the first layer @@ -14,7 +23,6 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra # Compute loss (MSE) loss = np.mean((predictions - targets)**2) - print(loss) #Compute gradient dW da2 = 2/N*(a2-targets) @@ -22,7 +30,7 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra dw2 = dz2*a1 db2 = dz2 da1 = dz2*np.sum(w2, axis=1) - dz1 = da1*a1*(1*a1) + dz1 = da1*a1*(1-a1) dw1 = dz1*a0 db1 = dz1 @@ -33,3 +41,161 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra return w1, b1, w2, b2, loss + + +def one_hot(label=np.ndarray): + """ + Encode une suite d'entier en binaire : encodeur one-hot. + + :label: La suite d'entier à encoder. + :return: la matrice encodée. + """ + result = np.zeros((np.size(label, 0), np.size(label, 0))) + for i in range(np.size(label, 0)): + result[i] = convert_integer_to_binary(label[i], np.size(label, 0)) + return result + +def convert_integer_to_binary(integer, size): + """ + Convert an integer into a binary vector with a specified size. + + :integer: Integer to convert to binary.. + :taille: Size of the specified binary vector. + :return: The converted binary vector. + """ + binary = [] + while integer > 0: + binary.insert(0, integer % 2) + integer //= 2 + + # Fill with zero on the left if necessary to reach the specified size + while len(binary) < size: + binary.insert(0, 0) + + return np.array(binary) + + +def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, labels_train: np.ndarray, learning_rate: np.ndarray): + """ + :w1: weights of the first layer of the network. + :b1: bias of the first layer of the network. + :w2: weights of the second layer of the network. + :b2: bias of the second layer of the network. + :data: input vector of the network. + :labels_train: output vector for the training of the network. + :learning_rate: factor for the gradient descent learning (quickness of the descent). + :return: updated weights and biases of the network after 1 loop of gradient descent, and the loss value. + """ + + # Forward pass + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) + + encoded_vector = one_hot(labels_train) + dz2 = a2 - encoded_vector + dw2 = dz2*a1 + db2 = dz2 + da1 = dz2*np.sum(w2, axis=1) + dz1 = da1*a1*(1-a1) + dw1 = dz1*a0 + db1 = dz1 + + w1 -= learning_rate*dw1 + w2 -= learning_rate*dw2 + b1 -= learning_rate*db1 + b2 -= learning_rate*db2 + + m = np.size(data, 0) + loss = (-1/m) * np.sum(labels_train * np.log(a2) + (1 - labels_train) * np.log(1 - a2)) + + return w1, b1, w2, b2, loss + +def train_mlp(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data_train: np.ndarray, labels_train: np.ndarray, learning_rate: float, num_epoch: int): + """ + :w1: weights of the first layer of the network. + :b1: bias of the first layer of the network. + :w2: weights of the second layer of the network. + :b2: bias of the second layer of the network. + :data_train: input training vector. + :labels_train: output training vector. + :learning_rate: factor for the gradient descent learning (quickness of the descent). + :num_epoch: number of training loops (gradient descent). + :return: updated weights and biases of the network after num_epoch loop of gradient descent, accuracy at each loop. + """ + c=0 + accuracies=[] + while c<num_epoch: + w1, b1, w2, b2, _ = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate) + c+=1 + + # Forward pass + a0 = data_train + z1 = np.matmul(a0, w1) + b1 + a1 = 1 / (1 + np.exp(-z1)) + z2 = np.matmul(a1, w2) + b2 + a2 = 1 / (1 + np.exp(-z2)) + accuracies = compute_accuracy(a2, labels_train) + return w1, b1, w2, b2, accuracies + +def compute_accuracy(y_predict, y_target): + true = 0 + for i in range(np.size(y_predict, 0)): + if y_predict[i] == y_target[0]: + true += 1 + return true/np.size(y_predict, 0) + +def test_mlp(w1: np.ndarray, b1: np.ndarray, w2:np.ndarray, b2:np.ndarray, data_test: np.ndarray, labels_test: np.ndarray): + """ + :w1: weights of the first layer of the network. + :b1: bias of the first layer of the network. + :w2: weights of the second layer of the network. + :b2: bias of the second layer of the network. + :data_test: input testing vector. + :labels_train: output testing vector. + :return: the accuracy of the test. + """ + + + w1, b1, w2, b2, _ = train_mlp(w1, b1, w2, b2, data_test, labels_test) + + a0 = data_test + z1 = np.matmul(a0, w1) + b1 + a1 = 1 / (1 + np.exp(-z1)) + z2 = np.matmul(a1, w2) + b2 + y_predict = 1 / (1 + np.exp(-z2)) + test_accuracy = compute_accuracy(y_predict, labels_test) + + return test_accuracy + +def run_mlp_training(data_train:np.ndarray, labels_train:np.ndarray, data_test:np.ndarray, labels_test:np.ndarray, d_h: int, learning_rate: float, num_epoch: int): + """ + :data_train: input training vector. + :labels_train: output training vector. + :data_test: input testing vector. + :labels_test: output testing vector. + :d_h: number of neurons on the hidden layer. + :learning_rate: factor for the gradient descent learning (quickness of the descent). + :num_epoch: number of training loops (gradient descent). + :return: the training accuracies across epochs as a list of floats and the final testing accuracy as a float. + """ + #Number of neurons on the first and the last layer. + d_in = np.size(data_train, 1) + d_out = np.size(data_test, 0) + + # Random initialization of the network weights and biaises + w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights + b1 = np.zeros((1, d_h)) # first layer biaises + w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights + b2 = np.zeros((1, d_out)) # second layer biaises + + w1, b1, w2, b2, list_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) + w1, b1, w2, b2, final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + + return list_accuracies, final_accuracy + + + + diff --git a/read_cifar.py b/read_cifar.py index df7393f..8e56b87 100644 --- a/read_cifar.py +++ b/read_cifar.py @@ -51,4 +51,3 @@ def split_dataset(data: np.ndarray, labels: np.ndarray, split: float): if __name__ == "__main__": data, labels = read_cifar() data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8) - print(1) -- GitLab