debug + final push

f4bb33e2 · NicolasJaune · 7f0bf235 · f4bb33e2 · f4bb33e2 · f4bb33e2
Commit f4bb33e2 authored 1 year ago by NicolasJaune
--- a/__pycache__/mlp.cpython-312.pyc
+++ b/__pycache__/mlp.cpython-312.pyc
--- a/__pycache__/read_cifar.cpython-312.pyc
+++ b/__pycache__/read_cifar.cpython-312.pyc
--- a/final_run.py
+++ b/final_run.py
-from read_cifar import read_cifar, split_dataset
+from read_cifar import read_cifar_batch, split_dataset
 from mlp import run_mlp_training
 import matplotlib.pyplot as plt
 if __name__ == "__main__":
-    data, labels = read_cifar()
+    data, labels = read_cifar_batch("data_batch_1") #On test le mlp avec 1 seul batch (sinon trop long)
    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split=0.9)
    list_accuracies, final_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h=64, learning_rate=0.1, num_epoch=100)
    plt.plot(list_accuracies)
+    plt.xlabel('Number of epoch')
+    plt.ylabel('Accuracy')
+    plt.grid(True)
+    plt.savefig('results/mlp.png')
    plt.show()
-    #plt.savefig("mlp.png")
--- a/knn.py
+++ b/knn.py
 import numpy as np
-from read_cifar import read_cifar, split_dataset
+from read_cifar import split_dataset, read_cifar_batch
 import matplotlib.pyplot as plt
 def distance_matrix(matrix_a: np.ndarray, matrix_b: np.ndarray):
@@ -15,7 +15,7 @@ def knn_predict(dists: np.ndarray, labels_train: np.ndarray, k:int):
    labels_predicts = np.zeros(np.size(dists, 0))
    for i in range(np.size(labels_predicts, 0)):
        #On extrait les indices des k valeurs plus petites (des k plus proches voisins)
-        k_neighbors_index = np.argmin(dists[i, :], np.sort(dists[i, :])[:k])
+        k_neighbors_index = np.argpartition(dists[i,:], k)[:k]
        #On compte la classe la plus présente parmi les k voisins les plus proches
        labels_k_neighbors = labels_train[k_neighbors_index]
        #On compte le nombre d'occurence des classes parmis les k
@@ -36,10 +36,14 @@ def evaluate_knn(data_train:np.ndarray, labels_train: np.ndarray, data_test:np.n
    return accuracy
 def plot_knn(data_train:np.ndarray, labels_train: np.ndarray, data_test:np.ndarray, labels_test:np.ndarray, n: int):
-    accuracy_vector = np.zeros(n)
+    accuracy_vector = np.zeros(n+1)
    for k in range(1, n+1):
-        accuracy_vector[k] = evaluate_knn(data_train, labels_train, data_test, labels_test)
+        accuracy_vector[k] = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
    plt.plot(accuracy_vector)
+    plt.xlabel('Number of Neighbors')
+    plt.ylabel('Accuracy')
+    plt.grid(True)
+    plt.savefig('results/knn.png')
    plt.show()
    return
@@ -48,7 +52,8 @@ def plot_knn(data_train:np.ndarray, labels_train: np.ndarray, data_test:np.ndarr
 if __name__ == "__main__":
-    data, labels = read_cifar()
+    data, labels = read_cifar_batch("data_batch_1") #On test le KNN avec 1 seul batch (sinon trop long)
    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split=0.8)
    k = 5 #Nombre de voisins
    accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k)
+    plot_knn(data_train, labels_train, data_test, labels_test, 20)
\ No newline at end of file
--- a/mlp.py
+++ b/mlp.py
@@ -45,15 +45,21 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra
 def one_hot(label=np.ndarray):
    """
-    Encode une suite d'entier en binaire : encodeur one-hot.
+    Encode les labels en mettant un 1 à l'indice correspondant à la classe du label, et des 0 sinon.
    :label: La suite d'entier à encoder.
    :return: la matrice encodée.
    """
-    result = np.zeros((np.size(label, 0), np.size(label, 0)))
+    num_classes = np.max(label) + 1
-    for i in range(np.size(label, 0)):
+    one_hot_matrix = np.eye(num_classes)[label]
-        result[i] = convert_integer_to_binary(label[i], np.size(label, 0))
+    return one_hot_matrix
-    return result
+def decode_class(encoded_labels:np.ndarray):
+    """
+    Decode un vecteur encodé avec one_hot et renvoie un vecteur qui contient les classes correspondantes
+    """
+    return np.argmax(encoded_labels, axis=1)
 def convert_integer_to_binary(integer, size):
    """
@@ -74,6 +80,11 @@ def convert_integer_to_binary(integer, size):
    return np.array(binary)
+def softmax(x):
+    return(np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum())
+def sigmoid(z):
+    return 1 / (1 + np.exp(-np.clip(z, -30, 30))) #pour éviter l'overflow
 def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, labels_train: np.ndarray, learning_rate: np.ndarray):
    """
@@ -90,18 +101,18 @@ def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2:
    # Forward pass
    a0 = data # the data are the input of the first layer
    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
-    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
+    a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
    z2 = np.matmul(a1, w2) + b2  # input of the output layer
-    a2 = 1 / (1 + np.exp(-z2))  # output of the output layer (sigmoid activation function)
+    a2 = softmax(z2)  # output of the output layer (sigmoid activation function)
-    encoded_vector = one_hot(labels_train)
+    encoded_labels_train = one_hot(labels_train)
-    dz2 = a2 - encoded_vector
+    dz2 = a2 - encoded_labels_train
-    dw2 = dz2*a1
+    dw2 = np.matmul(a1.T, dz2)
-    db2 = dz2
+    db2 = np.sum(dz2, axis=0, keepdims=True)
-    da1 = dz2*np.sum(w2, axis=1)
+    da1 = np.matmul(dz2, w2.T)
    dz1 = da1*a1*(1-a1)
-    dw1 = dz1*a0
+    dw1 = np.matmul(data.T, dz1)
-    db1 = dz1
+    db1 = np.sum(dz1, axis=0, keepdims=True)
    w1 -= learning_rate*dw1
    w2 -= learning_rate*dw2
@@ -109,7 +120,8 @@ def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2:
    b2 -= learning_rate*db2
    m = np.size(data, 0)
-    loss = (-1/m) * np.sum(labels_train * np.log(a2) + (1 - labels_train) * np.log(1 - a2))
+    eps = 10**(-9)
+    loss = (-1/m) * np.sum(encoded_labels_train * np.log(a2 + eps) + (1 - encoded_labels_train) * np.log(1 - a2 + eps))
    return w1, b1, w2, b2, loss
@@ -134,18 +146,15 @@ def train_mlp(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, da
        # Forward pass
        a0 = data_train
        z1 = np.matmul(a0, w1) + b1
-        a1 = 1 / (1 + np.exp(-z1))
+        a1 = sigmoid(z1)
        z2 = np.matmul(a1, w2) + b2
-        a2 = 1 / (1 + np.exp(-z2))
+        a2 = softmax(z2)
-        accuracies = compute_accuracy(a2, labels_train)
+        predictions = decode_class(a2)
+        accuracies.append(compute_accuracy(predictions, labels_train))
    return w1, b1, w2, b2, accuracies
 def compute_accuracy(y_predict, y_target):
-    true = 0
+    return np.mean(y_target == y_predict)
-    for i in range(np.size(y_predict, 0)):
-        if y_predict[i] == y_target[0]:
-            true += 1
-    return true/np.size(y_predict, 0)
 def test_mlp(w1: np.ndarray, b1: np.ndarray, w2:np.ndarray, b2:np.ndarray, data_test: np.ndarray, labels_test: np.ndarray):
    """
@@ -157,16 +166,13 @@ def test_mlp(w1: np.ndarray, b1: np.ndarray, w2:np.ndarray, b2:np.ndarray, data_
    :labels_train: output testing vector.
    :return: the accuracy of the test.
    """
-    w1, b1, w2, b2, _ = train_mlp(w1, b1, w2, b2, data_test, labels_test)
    a0 = data_test
    z1 = np.matmul(a0, w1) + b1
-    a1 = 1 / (1 + np.exp(-z1))
+    a1 = sigmoid(z1)
    z2 = np.matmul(a1, w2) + b2
-    y_predict = 1 / (1 + np.exp(-z2))
+    a2 = softmax(z2)
-    test_accuracy = compute_accuracy(y_predict, labels_test)
+    predictions = decode_class(a2)
+    test_accuracy = compute_accuracy(predictions, labels_test)
    return test_accuracy
@@ -183,7 +189,7 @@ def run_mlp_training(data_train:np.ndarray, labels_train:np.ndarray, data_test:n
    """
    #Number of neurons on the first and the last layer.
    d_in = np.size(data_train, 1)
-    d_out = np.size(data_test, 0)    
+    d_out = np.max(labels_train)+1   
    # Random initialization of the network weights and biaises
    w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
@@ -192,7 +198,7 @@ def run_mlp_training(data_train:np.ndarray, labels_train:np.ndarray, data_test:n
    b2 = np.zeros((1, d_out))  # second layer biaises
    w1, b1, w2, b2, list_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
-    w1, b1, w2, b2, final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
+    final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
    return list_accuracies, final_accuracy

--- a/read_cifar.py
+++ b/read_cifar.py
@@ -18,7 +18,7 @@ def read_cifar():
    #We are computing for the 5 first batchs
    data = []
    labels = []
-    for i in range(3):
+    for i in range(5):
        path_batch = directory + '/data_batch_' + str(i+1)
        with open(path_batch, 'rb') as fo:
            dict = pickle.load(fo, encoding='bytes')

--- a/results/knn.png
+++ b/results/knn.png
--- a/results/mlp.png
+++ b/results/mlp.png