Skip to content
Snippets Groups Projects
Commit f1d0d979 authored by Bourry Malo's avatar Bourry Malo
Browse files

end of Neurol Network mlp

parent 46e8b4cc
No related branches found
No related tags found
No related merge requests found
from read_cifar import read_cifar, split_dataset
from mlp import run_mlp_training
import matplotlib.pyplot as plt
if __name__ == "__main__":
data, labels = read_cifar()
data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split=0.9)
list_accuracies, final_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h=64, learning_rate=0.1, num_epoch=100)
plt.plot(list_accuracies)
plt.savefig("accuracies_mlp_network")
...@@ -12,7 +12,7 @@ def distance_matrix(matrix_a: np.ndarray, matrix_b: np.ndarray): ...@@ -12,7 +12,7 @@ def distance_matrix(matrix_a: np.ndarray, matrix_b: np.ndarray):
return dists return dists
def knn_predict(dists: np.ndarray, labels_train: np.ndarray, k:int): def knn_predict(dists: np.ndarray, labels_train: np.ndarray, k:int):
labels_predicts = np.zeros(np.size(dist, 0)) labels_predicts = np.zeros(np.size(dists, 0))
for i in range(np.size(labels_predicts, 0)): for i in range(np.size(labels_predicts, 0)):
#On extrait les indices des k valeurs plus petites (des k plus proches voisins) #On extrait les indices des k valeurs plus petites (des k plus proches voisins)
k_neighbors_index = np.argmin(dists[i, :], np.sort(dists[i, :])[:k]) k_neighbors_index = np.argmin(dists[i, :], np.sort(dists[i, :])[:k])
......
...@@ -2,7 +2,16 @@ import numpy as np ...@@ -2,7 +2,16 @@ import numpy as np
import math import math
def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, targets: np.ndarray, learning_rate: float): def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, targets: np.ndarray, learning_rate: float):
"""
:w1: weights of the first layer of the network.
:b1: bias of the first layer of the network.
:w2: weights of the second layer of the network.
:b2: bias of the second layer of the network.
:data: input vector of the network.
:targets: output vector to reach.
:learning_rate: factor for the gradient descent learning (quickness of the descent).
:return: updated weights and biases of the network after 1 loop of gradient descent.
"""
# Forward pass # Forward pass
N = np.size(data, 0) N = np.size(data, 0)
a0 = data # the data are the input of the first layer a0 = data # the data are the input of the first layer
...@@ -14,7 +23,6 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra ...@@ -14,7 +23,6 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
# Compute loss (MSE) # Compute loss (MSE)
loss = np.mean((predictions - targets)**2) loss = np.mean((predictions - targets)**2)
print(loss)
#Compute gradient dW #Compute gradient dW
da2 = 2/N*(a2-targets) da2 = 2/N*(a2-targets)
...@@ -22,7 +30,76 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra ...@@ -22,7 +30,76 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
dw2 = dz2*a1 dw2 = dz2*a1
db2 = dz2 db2 = dz2
da1 = dz2*np.sum(w2, axis=1) da1 = dz2*np.sum(w2, axis=1)
dz1 = da1*a1*(1*a1) dz1 = da1*a1*(1-a1)
dw1 = dz1*a0
db1 = dz1
w1 -= learning_rate*dw1
w2 -= learning_rate*dw2
b1 -= learning_rate*db1
b2 -= learning_rate*db2
return w1, b1, w2, b2, loss
def one_hot(label=np.ndarray):
"""
Encode une suite d'entier en binaire : encodeur one-hot.
:label: La suite d'entier à encoder.
:return: la matrice encodée.
"""
result = np.zeros((np.size(label, 0), np.size(label, 0)))
for i in range(np.size(label, 0)):
result[i] = convert_integer_to_binary(label[i], np.size(label, 0))
return result
def convert_integer_to_binary(integer, size):
"""
Convert an integer into a binary vector with a specified size.
:integer: Integer to convert to binary..
:taille: Size of the specified binary vector.
:return: The converted binary vector.
"""
binary = []
while integer > 0:
binary.insert(0, integer % 2)
integer //= 2
# Fill with zero on the left if necessary to reach the specified size
while len(binary) < size:
binary.insert(0, 0)
return np.array(binary)
def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, labels_train: np.ndarray, learning_rate: np.ndarray):
"""
:w1: weights of the first layer of the network.
:b1: bias of the first layer of the network.
:w2: weights of the second layer of the network.
:b2: bias of the second layer of the network.
:data: input vector of the network.
:labels_train: output vector for the training of the network.
:learning_rate: factor for the gradient descent learning (quickness of the descent).
:return: updated weights and biases of the network after 1 loop of gradient descent, and the loss value.
"""
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function)
encoded_vector = one_hot(labels_train)
dz2 = a2 - encoded_vector
dw2 = dz2*a1
db2 = dz2
da1 = dz2*np.sum(w2, axis=1)
dz1 = da1*a1*(1-a1)
dw1 = dz1*a0 dw1 = dz1*a0
db1 = dz1 db1 = dz1
...@@ -31,5 +108,94 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra ...@@ -31,5 +108,94 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
b1 -= learning_rate*db1 b1 -= learning_rate*db1
b2 -= learning_rate*db2 b2 -= learning_rate*db2
m = np.size(data, 0)
loss = (-1/m) * np.sum(labels_train * np.log(a2) + (1 - labels_train) * np.log(1 - a2))
return w1, b1, w2, b2, loss return w1, b1, w2, b2, loss
def train_mlp(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data_train: np.ndarray, labels_train: np.ndarray, learning_rate: float, num_epoch: int):
"""
:w1: weights of the first layer of the network.
:b1: bias of the first layer of the network.
:w2: weights of the second layer of the network.
:b2: bias of the second layer of the network.
:data_train: input training vector.
:labels_train: output training vector.
:learning_rate: factor for the gradient descent learning (quickness of the descent).
:num_epoch: number of training loops (gradient descent).
:return: updated weights and biases of the network after num_epoch loop of gradient descent, accuracy at each loop.
"""
c=0
accuracies=[]
while c<num_epoch:
w1, b1, w2, b2, _ = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
c+=1
# Forward pass
a0 = data_train
z1 = np.matmul(a0, w1) + b1
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
a2 = 1 / (1 + np.exp(-z2))
accuracies = compute_accuracy(a2, labels_train)
return w1, b1, w2, b2, accuracies
def compute_accuracy(y_predict, y_target):
true = 0
for i in range(np.size(y_predict, 0)):
if y_predict[i] == y_target[0]:
true += 1
return true/np.size(y_predict, 0)
def test_mlp(w1: np.ndarray, b1: np.ndarray, w2:np.ndarray, b2:np.ndarray, data_test: np.ndarray, labels_test: np.ndarray):
"""
:w1: weights of the first layer of the network.
:b1: bias of the first layer of the network.
:w2: weights of the second layer of the network.
:b2: bias of the second layer of the network.
:data_test: input testing vector.
:labels_train: output testing vector.
:return: the accuracy of the test.
"""
w1, b1, w2, b2, _ = train_mlp(w1, b1, w2, b2, data_test, labels_test)
a0 = data_test
z1 = np.matmul(a0, w1) + b1
a1 = 1 / (1 + np.exp(-z1))
z2 = np.matmul(a1, w2) + b2
y_predict = 1 / (1 + np.exp(-z2))
test_accuracy = compute_accuracy(y_predict, labels_test)
return test_accuracy
def run_mlp_training(data_train:np.ndarray, labels_train:np.ndarray, data_test:np.ndarray, labels_test:np.ndarray, d_h: int, learning_rate: float, num_epoch: int):
"""
:data_train: input training vector.
:labels_train: output training vector.
:data_test: input testing vector.
:labels_test: output testing vector.
:d_h: number of neurons on the hidden layer.
:learning_rate: factor for the gradient descent learning (quickness of the descent).
:num_epoch: number of training loops (gradient descent).
:return: the training accuracies across epochs as a list of floats and the final testing accuracy as a float.
"""
#Number of neurons on the first and the last layer.
d_in = np.size(data_train, 1)
d_out = np.size(data_test, 0)
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
w1, b1, w2, b2, list_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
w1, b1, w2, b2, final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
return list_accuracies, final_accuracy
...@@ -51,4 +51,3 @@ def split_dataset(data: np.ndarray, labels: np.ndarray, split: float): ...@@ -51,4 +51,3 @@ def split_dataset(data: np.ndarray, labels: np.ndarray, split: float):
if __name__ == "__main__": if __name__ == "__main__":
data, labels = read_cifar() data, labels = read_cifar()
data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8) data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8)
print(1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment