import numpy as np import math def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, targets: np.ndarray, learning_rate: float): """ :w1: weights of the first layer of the network. :b1: bias of the first layer of the network. :w2: weights of the second layer of the network. :b2: bias of the second layer of the network. :data: input vector of the network. :targets: output vector to reach. :learning_rate: factor for the gradient descent learning (quickness of the descent). :return: updated weights and biases of the network after 1 loop of gradient descent. """ # Forward pass N = np.size(data, 0) a0 = data # the data are the input of the first layer z1 = np.matmul(a0, w1) + b1 # input of the hidden layer a1 = 1 / (1 + math.exp(-z1)) # output of the hidden layer (sigmoid activation function) z2 = np.matmul(a1, w2) + b2 # input of the output layer a2 = 1 / (1 + math.exp(-z2)) # output of the output layer (sigmoid activation function) predictions = a2 # the predicted values are the outputs of the output layer # Compute loss (MSE) loss = np.mean((predictions - targets)**2) #Compute gradient dW da2 = 2/N*(a2-targets) dz2 = da2*a2*(1-a2) dw2 = dz2*a1 db2 = dz2 da1 = dz2*np.sum(w2, axis=1) dz1 = da1*a1*(1-a1) dw1 = dz1*a0 db1 = dz1 w1 -= learning_rate*dw1 w2 -= learning_rate*dw2 b1 -= learning_rate*db1 b2 -= learning_rate*db2 return w1, b1, w2, b2, loss def one_hot(label=np.ndarray): """ Encode une suite d'entier en binaire : encodeur one-hot. :label: La suite d'entier à encoder. :return: la matrice encodée. """ result = np.zeros((np.size(label, 0), np.size(label, 0))) for i in range(np.size(label, 0)): result[i] = convert_integer_to_binary(label[i], np.size(label, 0)) return result def convert_integer_to_binary(integer, size): """ Convert an integer into a binary vector with a specified size. :integer: Integer to convert to binary.. :taille: Size of the specified binary vector. :return: The converted binary vector. """ binary = [] while integer > 0: binary.insert(0, integer % 2) integer //= 2 # Fill with zero on the left if necessary to reach the specified size while len(binary) < size: binary.insert(0, 0) return np.array(binary) def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, labels_train: np.ndarray, learning_rate: np.ndarray): """ :w1: weights of the first layer of the network. :b1: bias of the first layer of the network. :w2: weights of the second layer of the network. :b2: bias of the second layer of the network. :data: input vector of the network. :labels_train: output vector for the training of the network. :learning_rate: factor for the gradient descent learning (quickness of the descent). :return: updated weights and biases of the network after 1 loop of gradient descent, and the loss value. """ # Forward pass a0 = data # the data are the input of the first layer z1 = np.matmul(a0, w1) + b1 # input of the hidden layer a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) z2 = np.matmul(a1, w2) + b2 # input of the output layer a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) encoded_vector = one_hot(labels_train) dz2 = a2 - encoded_vector dw2 = dz2*a1 db2 = dz2 da1 = dz2*np.sum(w2, axis=1) dz1 = da1*a1*(1-a1) dw1 = dz1*a0 db1 = dz1 w1 -= learning_rate*dw1 w2 -= learning_rate*dw2 b1 -= learning_rate*db1 b2 -= learning_rate*db2 m = np.size(data, 0) loss = (-1/m) * np.sum(labels_train * np.log(a2) + (1 - labels_train) * np.log(1 - a2)) return w1, b1, w2, b2, loss def train_mlp(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data_train: np.ndarray, labels_train: np.ndarray, learning_rate: float, num_epoch: int): """ :w1: weights of the first layer of the network. :b1: bias of the first layer of the network. :w2: weights of the second layer of the network. :b2: bias of the second layer of the network. :data_train: input training vector. :labels_train: output training vector. :learning_rate: factor for the gradient descent learning (quickness of the descent). :num_epoch: number of training loops (gradient descent). :return: updated weights and biases of the network after num_epoch loop of gradient descent, accuracy at each loop. """ c=0 accuracies=[] while c<num_epoch: w1, b1, w2, b2, _ = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate) c+=1 # Forward pass a0 = data_train z1 = np.matmul(a0, w1) + b1 a1 = 1 / (1 + np.exp(-z1)) z2 = np.matmul(a1, w2) + b2 a2 = 1 / (1 + np.exp(-z2)) accuracies = compute_accuracy(a2, labels_train) return w1, b1, w2, b2, accuracies def compute_accuracy(y_predict, y_target): true = 0 for i in range(np.size(y_predict, 0)): if y_predict[i] == y_target[0]: true += 1 return true/np.size(y_predict, 0) def test_mlp(w1: np.ndarray, b1: np.ndarray, w2:np.ndarray, b2:np.ndarray, data_test: np.ndarray, labels_test: np.ndarray): """ :w1: weights of the first layer of the network. :b1: bias of the first layer of the network. :w2: weights of the second layer of the network. :b2: bias of the second layer of the network. :data_test: input testing vector. :labels_train: output testing vector. :return: the accuracy of the test. """ w1, b1, w2, b2, _ = train_mlp(w1, b1, w2, b2, data_test, labels_test) a0 = data_test z1 = np.matmul(a0, w1) + b1 a1 = 1 / (1 + np.exp(-z1)) z2 = np.matmul(a1, w2) + b2 y_predict = 1 / (1 + np.exp(-z2)) test_accuracy = compute_accuracy(y_predict, labels_test) return test_accuracy def run_mlp_training(data_train:np.ndarray, labels_train:np.ndarray, data_test:np.ndarray, labels_test:np.ndarray, d_h: int, learning_rate: float, num_epoch: int): """ :data_train: input training vector. :labels_train: output training vector. :data_test: input testing vector. :labels_test: output testing vector. :d_h: number of neurons on the hidden layer. :learning_rate: factor for the gradient descent learning (quickness of the descent). :num_epoch: number of training loops (gradient descent). :return: the training accuracies across epochs as a list of floats and the final testing accuracy as a float. """ #Number of neurons on the first and the last layer. d_in = np.size(data_train, 1) d_out = np.size(data_test, 0) # Random initialization of the network weights and biaises w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights b1 = np.zeros((1, d_h)) # first layer biaises w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights b2 = np.zeros((1, d_out)) # second layer biaises w1, b1, w2, b2, list_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) w1, b1, w2, b2, final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) return list_accuracies, final_accuracy