Last commit

7612de50 · Duperret Loris · 3a8d858d · 7612de50 · 7612de50 · 7612de50
Commit 7612de50 authored Nov 9, 2023 by Duperret Loris
--- a/README.md
+++ b/README.md
+Artificial Neural Network 
+Règles de la chaine pour le calcul du gradient :
+
+
+2. ∂C/∂A(2) = 2/Nout * (A(2) - Y)
+
+3. ∂C/∂Z(2) = ∂C/∂A(2) * ∂A(2)/∂Z(2)  
+   ∂C/∂Z(2) = ∂C/∂A(2) * σ'(Z(2))
+   ∂C/∂Z(2) = ∂C/∂A(2) * σ(Z(2)) * (1-σ(Z(2)))
+   ∂C/∂Z(2) = ∂C/∂A(2) * A(2) * (1 - A(2))
+
+4. ∂C/∂W(2) = ∂C/∂Z(2) * ∂Z(2)/∂W(2)
+   ∂C/∂W(2) = ∂C/∂Z(2) * A(1)
+
+5. ∂C/∂B(2) = ∂C/∂Z(2) * ∂Z(2)/∂B(2)
+   ∂C/∂B(2) = sum(∂C/∂Z(2), axis=0)
+
+6. ∂C/∂A(1) = ∂C/∂Z(2) * ∂Z(2)/∂A(1)
+   ∂C/∂A(1) = ∂C/∂Z(2) * W(2)
+
+7. ∂C/∂Z(1) = ∂C/∂A(1) * σ'(Z(1))
+   ∂C/∂Z(1) = ∂C/∂A(1) * A(1) * (1 - A(1))
+
+8. ∂C/∂W(1) = ∂C/∂Z(1) * A(0)
+
+9. ∂C/∂B(1) = sum(∂C/∂Z(1), axis=0)
+
+En analysant le graphique knn, on peut voir que le maximum de précision est obtenu pour k = 1, ce qui parait logique puisque chaque point 
+prendra alors son propre label. 
+L'autre pic de précision se situe aux alentours de k=7. On privilégiera donc ces valeurs afin d'obtenir la meilleure précision possible.
+On voit que le temps d'éxécution pour les différentes valeurs de k reste stable autour de 28s.
+
+En analysant le graphique mlp, on observe que la précision oscille autour d'une valeur proche de 0.1 avec des pics. 
+Malgré tout, on peut conclure que l'apprentissage n'est pas très bon puisque la tendance générale est à la stagnation de la précision.
+
+
--- a/knn.py
+++ b/knn.py
 import numpy as np
 from sklearn.metrics import accuracy_score
-import matplotlib.pyplot as plt


-def distance_matrix(matrix1, matrix2):
-    # Calculate the squared norms of each row in the input matrices
-    norms1 = np.sum(matrix1**2, axis=1, keepdims=True)
-    norms2 = np.sum(matrix2**2, axis=1, keepdims=True)

-    # Compute the dot product between the matrices
-    dot_product = np.dot(matrix1, matrix2.T)
+def distance_matrix(mat1, mat2):
+    norms1 = np.sum(mat1**2, axis=1, keepdims=True)
+    norms2 = np.sum(mat2**2, axis=1, keepdims=True)

-    # Calculate the L2 Euclidean distance using the hint formula
-    dists = np.sqrt(norms1 - 2 * dot_product + norms2.T)
+    dot_product = np.dot(mat1, mat2.T)

+    dists = np.sqrt(norms1 - 2 * dot_product + norms2.T)
    return dists

 def knn_predict(dists, labels_train, k):
-    # Number of test samples
    num_test_samples = dists.shape[0]

-    # Initialize an array to store the predicted labels
-    predicted_labels = np.zeros(num_test_samples, dtype=labels_train.dtype)
+    pred_labels = np.zeros(num_test_samples, dtype=labels_train.dtype)

    for i in range(num_test_samples):
-        # Get the distances for the current test sample
        distances = dists[i]

-        # Find the indices of the k nearest neighbors
+        # On trouve les k indices avec la distance minimale
        k_nearest_indices = np.argsort(distances)[:k]

-        # Get the labels of the k nearest neighbors
+        # On récupère les labels de ces voisins
        k_nearest_labels = labels_train[k_nearest_indices]

-        # Use np.bincount to count the occurrences of each label
-        # and choose the label with the highest count
-        predicted_label = np.argmax(np.bincount(k_nearest_labels))
+        # On compte les occurences des labels et on choisit celui qui apparait le plus
+        pred_label = np.argmax(np.bincount(k_nearest_labels))

-        # Assign the predicted label to the current test sample
-        predicted_labels[i] = predicted_label
+        pred_labels[i] = pred_label

-    return predicted_labels
+    return pred_labels



 def evaluate_knn(data_train, labels_train, data_test, labels_test, k):
-    # Use the previously defined knn_predict function to get predictions
    predicted_labels = knn_predict(distance_matrix(data_test, data_train), labels_train, k)

-    # Calculate the accuracy by comparing predicted labels to actual labels
+    # Calcule la précision grâce à la prediction et à la valeur réelle
    accuracy = accuracy_score(labels_test, predicted_labels)

    return accuracy

--- a/main.py
+++ b/main.py
@@ -2,35 +2,50 @@ import read_cifar
 import knn
 import matplotlib.pyplot as plt
 import mlp
+import time

 split = 0.9
 d_h=64
 learning_rate=0.1
-num_epochs=2
+num_epochs=100


 batch_path = "data/cifar-10-python\cifar-10-batches-py"
 data, labels = read_cifar.read_cifar(batch_path)
 data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, split)

-"""k_values = range(1, 21)
+k_values = range(1, 21)
 accuracies = []
+times = []

 for k in k_values:
+    start_time = time.time()
    accuracy = knn.evaluate_knn(data_train, labels_train, data_test, labels_test, k)
+    end_time = time.time()
+    execution_time=end_time-start_time
+    times.append(execution_time)
    accuracies.append(accuracy)
+    print(f"Accuracy for k={k}: {accuracy:.2f}, Time: {execution_time:.2f}s")

 plt.figure(figsize=(8, 6))
 plt.plot(k_values, accuracies, marker='o')
 plt.title('KNN Accuracy vs. k')
 plt.xlabel('k')
 plt.ylabel('Accuracy')
+plt.xticks(k_values)
 plt.grid(True)
-
-# On enregistre le graphique dans Results
 plt.savefig('results/knn.png')
+plt.show()

-plt.show()"""
+plt.figure(figsize=(8, 6))
+plt.plot(k_values,times, marker='o')
+plt.title('Execution time vs. k')
+plt.xlabel('k')
+plt.ylabel('time')
+plt.xticks(k_values)
+plt.grid(True)
+plt.savefig('results/time_knn.png')
+plt.show()


 train_accuracies,test_accuracy =  mlp.run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs)
@@ -41,4 +56,5 @@ def plot_learning_accuracy(train_accuracies):
    plt.ylabel("Training Accuracy")
    plt.title("MLP Training Accuracy")
    plt.savefig("results/mlp.png")
+
 plot_learning_accuracy(train_accuracies)
\ No newline at end of file
--- a/mlp.py
+++ b/mlp.py
 import numpy as np
-import matplotlib.pyplot as plt

 def sigmoid(x):
    return 1 / (1 + np.exp(-x))
@@ -8,7 +7,6 @@ def sigmoid_derivative(x):
    return x * (1 - x)

 def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
-    # Forward pass
    a0 = data
    z1 = np.matmul(a0, w1) + b1
    a1 = sigmoid(z1)
@@ -16,7 +14,7 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
    a2 = sigmoid(z2)
    predictions = a2

-    # Compute loss (MSE)
+    # MSE
    loss = np.mean(np.square(predictions - targets))

    # Backpropagation
@@ -25,7 +23,7 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
    delta_a1 = np.matmul(delta_z2, w2.T)
    delta_z1 = delta_a1 * sigmoid_derivative(a1)

-    # Update weights and biases
+    # Mise à jour weight et biais
    w2 -= learning_rate * np.matmul(a1.T, delta_z2)
    b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True)
    w1 -= learning_rate * np.matmul(a0.T, delta_z1)
@@ -33,32 +31,40 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):

    return w1, b1, w2, b2, loss

-def one_hot(labels, num_classes):
-    one_hot_matrix = np.zeros((len(labels), num_classes))
-    one_hot_matrix[np.arange(len(labels)), labels] = 1
+def softmax(z): #evite les instabilités de loss
+    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
+    return exp_z / np.sum(exp_z, axis=1, keepdims=True)
+
+
+def one_hot(labels):
+    n_classes = len(np.unique(labels))
+    one_hot_matrix = np.zeros((labels.shape[0], n_classes))
+    for i, label in enumerate(labels):
+        one_hot_matrix[i, label] = 1
    return one_hot_matrix


 def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
-    # Forward pass
    a0 = data
    z1 = np.matmul(a0, w1) + b1
    a1 = sigmoid(z1)
    z2 = np.matmul(a1, w2) + b2
-    a2 = sigmoid(z2)
+    a2 = softmax(z2)
    predictions = a2

-    # Compute loss (cross-entropy)
+    # Calcule cross entropy
    m = len(labels_train)
-    one_hot_labels = one_hot(labels_train, num_classes=w2.shape[1])
-    loss = -1/m * np.sum(one_hot_labels * np.log(predictions) + (1 - one_hot_labels) * np.log(1 - predictions))
+    one_hot_labels = one_hot(labels_train)
+    epsilon = 1e-9
+    predictions = np.clip(predictions, epsilon, 1 - epsilon) #évite les instabilités de loss
+    loss = -np.mean(one_hot_labels * np.log(predictions))

    # Backpropagation
    delta_z2 = a2 - one_hot_labels
    delta_a1 = np.matmul(delta_z2, w2.T)
    delta_z1 = delta_a1 * sigmoid_derivative(a1)

-    # Update weights and biases
+    # Mise à jour weight et biais
    w2 -= learning_rate * np.matmul(a1.T, delta_z2)
    b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True)
    w1 -= learning_rate * np.matmul(a0.T, delta_z1)
@@ -77,40 +83,17 @@ def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch
    train_accuracies = []

    for epoch in range(num_epochs):
-        for i in range(len(data_train)):
-            data = data_train[i:i+1]
-            labels = labels_train[i:i+1]
+        w1, b1, w2, b2, loss = learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate)

-            # Forward pass
-            a0 = data
+        # Calcule l'accuracy pour cette epoch
+        a0 = data_train
        z1 = np.matmul(a0, w1) + b1
        a1 = sigmoid(z1)
        z2 = np.matmul(a1, w2) + b2
-            a2 = sigmoid(z2)
+        a2 = softmax(z2)
        predictions = a2

-            # Compute loss (cross-entropy)
-            one_hot_labels = one_hot(labels, num_classes=w2.shape[1])
-            loss = -np.sum(one_hot_labels * np.log(predictions) + (1 - one_hot_labels) * np.log(1 - predictions))
-
-            # Backpropagation
-            delta_z2 = a2 - one_hot_labels
-            delta_a1 = np.matmul(delta_z2, w2.T)
-            delta_z1 = delta_a1 * sigmoid_derivative(a1)
-
-            # Update weights and biases
-            w2 -= learning_rate * np.matmul(a1.T, delta_z2)
-            b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True)
-            w1 -= learning_rate * np.matmul(a0.T, delta_z1)
-            b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True)
-
-        # Calculate training accuracy for this epoch
-        a0 = data_train
-        z1 = np.matmul(a0, w1) + b1
-        a1 = sigmoid(z1)
-        z2 = np.matmul(a1, w2) + b2
-        a2 = sigmoid(z2)
-        train_accuracy = compute_accuracy(a2, labels_train)
+        train_accuracy = compute_accuracy(predictions, labels_train)
        train_accuracies.append(train_accuracy)

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss:.4f}, Training Accuracy: {train_accuracy:.4f}")
@@ -122,11 +105,10 @@ def test_mlp(w1, b1, w2, b2, data_test, labels_test):
    z1 = np.matmul(a0, w1) + b1
    a1 = sigmoid(z1)
    z2 = np.matmul(a1, w2) + b2
-    a2 = sigmoid(z2)
+    a2 = softmax(z2)
+    predictions = a2

-    predicted_labels = np.argmax(a2, axis=1)
-    correct = np.sum(predicted_labels == labels_test)
-    test_accuracy = correct / len(labels_test)
+    test_accuracy = compute_accuracy(predictions, labels_test)

    return test_accuracy

@@ -140,41 +122,7 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear
    w2 = 2 * np.random.rand(d_h, d_out) - 1
    b2 = np.zeros((1, d_out))

-    train_accuracies = []
-
-    for epoch in range(num_epochs):
-        for i in range(len(data_train)):
-            data = data_train[i:i+1]
-            labels = labels_train[i:i+1]
-
-            a0 = data
-            z1 = np.matmul(a0, w1) + b1
-            a1 = sigmoid(z1)
-            z2 = np.matmul(a1, w2) + b2
-            a2 = sigmoid(z2)
-
-            one_hot_labels = one_hot(labels,num_classes=d_out)
-            loss = -np.sum(one_hot_labels * np.log(a2) + (1 - one_hot_labels) * np.log(1 - a2))
-
-            delta_z2 = a2 - one_hot_labels
-            delta_a1 = np.matmul(delta_z2, w2.T)
-            delta_z1 = delta_a1 * a1 * (1 - a1)
-
-            w2 -= learning_rate * np.matmul(a1.T, delta_z2)
-            b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True)
-            w1 -= learning_rate * np.matmul(a0.T, delta_z1)
-            b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True)
-
-        a0 = data_train
-        z1 = np.matmul(a0, w1) + b1
-        a1 = sigmoid(z1)
-        z2 = np.matmul(a1, w2) + b2
-        a2 = sigmoid(z2)
-
-        train_accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train)
-        train_accuracies.append(train_accuracy)
-        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss:.4f}, Training Accuracy: {train_accuracy:.4f}")
-
+    w1,b1,w2,b2, train_accuracies = train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epochs)
    test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)

    return train_accuracies, test_accuracy

--- a/read_cifar.py
+++ b/read_cifar.py
@@ -6,10 +6,9 @@ def read_cifar_batch(batch_path):
    with open(batch_path, 'rb') as file:
        batch_data = pickle.load(file, encoding='bytes')

-    data = batch_data[b'data']  # CIFAR-10 data
-    labels = batch_data[b'labels']  # Class labels
+    data = batch_data[b'data']
+    labels = batch_data[b'labels']

-    # Convertis data et label dans les types souhaités
    data = np.array(data, dtype=np.float32)
    labels = np.array(labels, dtype=np.int64)

@@ -36,12 +35,10 @@ def read_cifar(directory_path):

 def split_dataset(data, labels, split):

-    # Get the number of samples in the dataset
    num_samples = len(data)

-    # Calculate the number of samples for training and testing
+    # Calcule le nombre de samples d'entrainement
    num_train_samples = int(num_samples * split)
-    num_test_samples = num_samples - num_train_samples

    # Cree des permutations aléatoires
    shuffle_indices = np.random.permutation(num_samples)
@@ -54,14 +51,3 @@ def split_dataset(data, labels, split):

    return data_train, labels_train, data_test, labels_test

-
-
-if __name__ == '__main__':
-    batch_path = "data/cifar-10-python\cifar-10-batches-py"
-    data, labels = read_cifar(batch_path)
-    print("Data shape:", data.shape)
-    print("Labels shape:", labels.shape)
-    split=0.9
-    data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split)
-
-
--- a/results/knn.png
+++ b/results/knn.png
--- a/results/mlp.png
+++ b/results/mlp.png
--- a/results/time_knn.png
+++ b/results/time_knn.png