end of Neurol Network mlp

f1d0d979 · Bourry Malo · 46e8b4cc · f1d0d979 · f1d0d979 · f1d0d979
Commit f1d0d979 authored 1 year ago by Bourry Malo
--- a/final_run
+++ b/final_run
+from read_cifar import read_cifar, split_dataset
+from mlp import run_mlp_training
+import matplotlib.pyplot as plt
+if __name__ == "__main__":
+    data, labels = read_cifar()
+    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split=0.9)
+    list_accuracies, final_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h=64, learning_rate=0.1, num_epoch=100)
+    plt.plot(list_accuracies)
+    plt.savefig("accuracies_mlp_network")
--- a/knn.py
+++ b/knn.py
@@ -12,7 +12,7 @@ def distance_matrix(matrix_a: np.ndarray, matrix_b: np.ndarray):
    return dists
 def knn_predict(dists: np.ndarray, labels_train: np.ndarray, k:int):
-    labels_predicts = np.zeros(np.size(dist, 0))
+    labels_predicts = np.zeros(np.size(dists, 0))
    for i in range(np.size(labels_predicts, 0)):
        #On extrait les indices des k valeurs plus petites (des k plus proches voisins)
        k_neighbors_index = np.argmin(dists[i, :], np.sort(dists[i, :])[:k])

--- a/mlp.py
+++ b/mlp.py
@@ -2,7 +2,16 @@ import numpy as np
 import math
 def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, targets: np.ndarray, learning_rate: float):
+    """
+    :w1: weights of the first layer of the network.
+    :b1: bias of the first layer of the network.
+    :w2: weights of the second layer of the network.
+    :b2: bias of the second layer of the network.
+    :data: input vector of the network.
+    :targets: output vector to reach.
+    :learning_rate: factor for the gradient descent learning (quickness of the descent).
+    :return: updated weights and biases of the network after 1 loop of gradient descent.
+    """
    # Forward pass
    N = np.size(data, 0)
    a0 = data # the data are the input of the first layer
@@ -14,7 +23,6 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
    # Compute loss (MSE)
    loss = np.mean((predictions - targets)**2)
-    print(loss)
    #Compute gradient dW
    da2 = 2/N*(a2-targets)
@@ -22,7 +30,76 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
    dw2 = dz2*a1
    db2 = dz2
    da1 = dz2*np.sum(w2, axis=1)
-    dz1 = da1*a1*(1*a1)
+    dz1 = da1*a1*(1-a1)
+    dw1 = dz1*a0
+    db1 = dz1
+    w1 -= learning_rate*dw1
+    w2 -= learning_rate*dw2
+    b1 -= learning_rate*db1
+    b2 -= learning_rate*db2
+    return w1, b1, w2, b2, loss
+def one_hot(label=np.ndarray):
+    """
+    Encode une suite d'entier en binaire : encodeur one-hot.
+    :label: La suite d'entier à encoder.
+    :return: la matrice encodée.
+    """
+    result = np.zeros((np.size(label, 0), np.size(label, 0)))
+    for i in range(np.size(label, 0)):
+        result[i] = convert_integer_to_binary(label[i], np.size(label, 0))
+    return result
+def convert_integer_to_binary(integer, size):
+    """
+    Convert an integer into a binary vector with a specified size.
+    :integer: Integer to convert to binary..
+    :taille: Size of the specified binary vector.
+    :return: The converted binary vector.
+    """
+    binary = []
+    while integer > 0:
+        binary.insert(0, integer % 2)
+        integer //= 2
+    # Fill with zero on the left if necessary to reach the specified size
+    while len(binary) < size:
+        binary.insert(0, 0)
+    return np.array(binary)
+def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, labels_train: np.ndarray, learning_rate: np.ndarray):
+    """
+    :w1: weights of the first layer of the network.
+    :b1: bias of the first layer of the network.
+    :w2: weights of the second layer of the network.
+    :b2: bias of the second layer of the network.
+    :data: input vector of the network.
+    :labels_train: output vector for the training of the network.
+    :learning_rate: factor for the gradient descent learning (quickness of the descent).
+    :return: updated weights and biases of the network after 1 loop of gradient descent, and the loss value.
+    """
+    # Forward pass
+    a0 = data # the data are the input of the first layer
+    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    z2 = np.matmul(a1, w2) + b2  # input of the output layer
+    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+    encoded_vector = one_hot(labels_train)
+    dz2 = a2 - encoded_vector
+    dw2 = dz2*a1
+    db2 = dz2
+    da1 = dz2*np.sum(w2, axis=1)
+    dz1 = da1*a1*(1-a1)
    dw1 = dz1*a0
    db1 = dz1
@@ -31,5 +108,94 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
    b1 -= learning_rate*db1
    b2 -= learning_rate*db2
+    m = np.size(data, 0)
+    loss = (-1/m) * np.sum(labels_train * np.log(a2) + (1 - labels_train) * np.log(1 - a2))
    return w1, b1, w2, b2, loss
+def train_mlp(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data_train: np.ndarray, labels_train: np.ndarray, learning_rate: float, num_epoch: int):
+    """
+    :w1: weights of the first layer of the network.
+    :b1: bias of the first layer of the network.
+    :w2: weights of the second layer of the network.
+    :b2: bias of the second layer of the network.
+    :data_train: input training vector.
+    :labels_train: output training vector.
+    :learning_rate: factor for the gradient descent learning (quickness of the descent).
+    :num_epoch: number of training loops (gradient descent).
+    :return: updated weights and biases of the network after num_epoch loop of gradient descent, accuracy at each loop.
+    """
+    c=0
+    accuracies=[]
+    while c<num_epoch:
+        w1, b1, w2, b2, _ = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
+        c+=1
+        # Forward pass
+        a0 = data_train
+        z1 = np.matmul(a0, w1) + b1
+        a1 = 1 / (1 + np.exp(-z1))
+        z2 = np.matmul(a1, w2) + b2
+        a2 = 1 / (1 + np.exp(-z2))
+        accuracies = compute_accuracy(a2, labels_train)
+    return w1, b1, w2, b2, accuracies
+def compute_accuracy(y_predict, y_target):
+    true = 0
+    for i in range(np.size(y_predict, 0)):
+        if y_predict[i] == y_target[0]:
+            true += 1
+    return true/np.size(y_predict, 0)
+def test_mlp(w1: np.ndarray, b1: np.ndarray, w2:np.ndarray, b2:np.ndarray, data_test: np.ndarray, labels_test: np.ndarray):
+    """
+    :w1: weights of the first layer of the network.
+    :b1: bias of the first layer of the network.
+    :w2: weights of the second layer of the network.
+    :b2: bias of the second layer of the network.
+    :data_test: input testing vector.
+    :labels_train: output testing vector.
+    :return: the accuracy of the test.
+    """
+    w1, b1, w2, b2, _ = train_mlp(w1, b1, w2, b2, data_test, labels_test)
+    a0 = data_test
+    z1 = np.matmul(a0, w1) + b1
+    a1 = 1 / (1 + np.exp(-z1))
+    z2 = np.matmul(a1, w2) + b2
+    y_predict = 1 / (1 + np.exp(-z2))
+    test_accuracy = compute_accuracy(y_predict, labels_test)
+    return test_accuracy
+def run_mlp_training(data_train:np.ndarray, labels_train:np.ndarray, data_test:np.ndarray, labels_test:np.ndarray, d_h: int, learning_rate: float, num_epoch: int):
+    """
+    :data_train: input training vector.
+    :labels_train: output training vector.
+    :data_test: input testing vector.
+    :labels_test: output testing vector.
+    :d_h: number of neurons on the hidden layer.
+    :learning_rate: factor for the gradient descent learning (quickness of the descent).
+    :num_epoch: number of training loops (gradient descent).
+    :return: the training accuracies across epochs as a list of floats and the final testing accuracy as a float.
+    """
+    #Number of neurons on the first and the last layer.
+    d_in = np.size(data_train, 1)
+    d_out = np.size(data_test, 0)    
+    # Random initialization of the network weights and biaises
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
+    b1 = np.zeros((1, d_h))  # first layer biaises
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
+    b2 = np.zeros((1, d_out))  # second layer biaises
+    w1, b1, w2, b2, list_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
+    w1, b1, w2, b2, final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
+    return list_accuracies, final_accuracy
--- a/read_cifar.py
+++ b/read_cifar.py
@@ -51,4 +51,3 @@ def split_dataset(data: np.ndarray, labels: np.ndarray, split: float):
 if __name__ == "__main__":
    data, labels = read_cifar()
    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8)
-    print(1)