Final Update

157f7118 · selalimi · d227b04d · 157f7118 · d227b04d · 157f7118
Commit 157f7118 authored Nov 11, 2023 by selalimi
--- a/README.md
+++ b/README.md
@@ -90,7 +90,7 @@ Unfortunately, the performance of the KNN algorithm was disappointing, with accu
 4. *Lack of Feature Abstraction*: KNN directly uses pixels as features. More advanced feature extraction techniques could improve performance

 ## Analysis of ANN Results
-The deep learning algorithm (ANN) used for our dataset has relatively low performance, with test set accuracy plateauing around 15% over 100 epochs.
+The deep learning algorithm (ANN) used for our dataset has relatively low performance, with test set accuracy plateauing around 14% over 100 epochs.

 These results suggest that adjustments to certain aspects of the model, such as complexity, hyperparameters, or weight initialization, may be necessary to improve its ability to generalize to new data. Further exploration of these aspects could be beneficial in optimizing model performance.


--- a/Results/mlp.png
+++ b/Results/mlp.png
--- a/knn.py
+++ b/knn.py
@@ -2,7 +2,7 @@ import numpy as np
 import os
 import pickle
 import matplotlib.pyplot as plt
-import plotly.graph_objects as go
+import read_cifar as rc



@@ -77,3 +77,15 @@ def plot_KNN(X_train, y_train, X_test, y_test, max_k=20):
    plt.ylabel('Accuracy')
    plt.title('Variation of Accuracy with K')
    plt.savefig("Results/knn.png")
+
+# The following code block is executed only if the script is run as the main program
+if _name_ == "_main_":
+    # Read the CIFAR-10 dataset from the specified path
+    X, y = rc.read_cifar('data\cifar-10-batches-py')
+    
+    # Split the dataset into training and testing sets
+    X_train, y_train, X_test, y_test = rc.split_dataset(X, y, split=0.9)
+    
+    # Plot the evolution of learning accuracy across the number of neighbors (K) using the 'plot_KNN' function
+    plot_KNN(X_train, y_train, X_test, y_test, max_k=20)
+
--- a/main.ipynb
+++ b/main.ipynb
--- a/mlp.py
+++ b/mlp.py
 import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
-
-
-N = 30  # number of input data
-d_in = 3  # input dimension
-d_h = 3  # number of neurons in the hidden layer
-d_out = 2  # output dimension (number of neurons of the output layer)
-learning_rate = 0.1  
-num_epochs=100
-
-# Random initialization of the network weights and biaises
-def initialization(d_in,d_h,d_out):
-    np.random.seed(10)  # To get the same random values
-    W1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
-    b1 = np.zeros((1, d_h))  # first layer biaises
-    W2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
-    b2 = np.zeros((1, d_out))  # second layer biaises 
-    return W1,b1,W2,b2
-
-data = np.random.rand(N, d_in)  # create a random data
-targets = np.random.rand(N, d_out)  # create a random targets
-
-# Define the sigmoid activation function
-def sigmoid(x,derivate):
- if derivate==False:
-    return 1 / (1 + np.exp(-x))
- else:
-     return x*(1-x)
- 
-
-# Define the softmax activation function
-def softmax(x,derivate):
-    if derivate == False :
-      return np.exp(x) / np.exp(np.array(x)).sum(axis=1, keepdims=True)
-    else :
-        return x*(1-x)
-    
-#Definir les métriques :
-def loss_metrics(predictions, targets, metric, status):
-    if metric == "MSE":
-        if status == "forward":
-            return np.mean((predictions - targets) ** 2)
-        elif status == "backward":
-            return 2 * (predictions - targets) / len(predictions)  # Gradient of MSE loss
-    elif metric == "BCE":
-        # Binary Cross-Entropy Loss
-        epsilon = 1e-15  # Small constant to prevent log(0)
-        predictions = np.clip(predictions, epsilon, 1 - epsilon)
-        if status == "forward":
-            return - (targets * np.log(predictions) + (1 - targets) * np.log(1 - predictions)).mean()
-        elif status == "backward":
-            return (predictions - targets) / ((1 - predictions) * predictions)  # Gradient of BCE loss
-    else:
-        raise ValueError("Metric not supported: " + metric)
+import read_cifar as rc

 # learn_once_mse
+def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
    """
    Update the weights and biases of the network for one gradient descent step using Mean Squared Error (MSE) loss.

@@ -74,42 +24,38 @@ def loss_metrics(predictions, targets, metric, status):
    - b2: Updated bias vector of the second layer.
    - loss: Mean Squared Error (MSE) loss for monitoring.
    """
-
-def learn_once_mse(W1, b1, W2, b2, data, targets, learning_rate):
+    a0 = data
    # Forward pass
-    # Calculate the input and output of the hidden layer
-    hidden_layer_input = np.matmul(data, W1) + b1
-    hidden_layer_output = sigmoid(hidden_layer_input, derivate=False)  # Apply the sigmoid activation
+    z1 = np.matmul(a0, w1) + b1  # Calculate the weighted sum for the hidden layer
+    a1 = 1 / (1 + np.exp(-z1))  # Apply the sigmoid activation function to hidden layer
+    z2 = np.matmul(a1, w2) + b2  # Calculate the weighted sum for the output layer
+    a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)  # Apply the softmax activation to the output layer
+    predictions = a2  # The network's predictions

-    # Calculate the input and output of the output layer
-    output_layer_input = np.matmul(hidden_layer_output, W2) + b2
-    output_layer_output = softmax(output_layer_input, derivate=False)  # Apply the softmax activation
+    n = data.shape[0]  # Number of samples (batch size)

-    # Backpropagation phase
-    # Calculate the error at the output layer
-    output_error = output_layer_output - targets
+    # Backpropagation
+    e2 = predictions - targets  # Compute the error in the output layer

-    # Calculate gradients for the output layer
-    output_layer_gradients = output_error * softmax(output_layer_output, derivate=True)
+    dw2 = e2 * a2 * (1 - a2) / n  # Gradient for w2
+    update_w2 = np.dot(a1.T, dw2) / n # Update for w2
+    update_b2 = (1/a1.shape[1])*dw2.sum(axis=0, keepdims=True) # Update for b2

-    # Update weights and biases of the output layer
-    W2 = W2 - learning_rate * np.dot(hidden_layer_output.T, output_layer_gradients) / data.shape[0]
-    b2 = b2 - learning_rate * (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True)
+    e1 = np.dot(e2, w2.T) # Compute the error in the hidden layer
+    dw1 = e1 * a1 * (1 - a1)  # Gradient for w1
+    update_b1 = (1/data.shape[1])*dw1.sum(axis=0, keepdims=True)  # Update for b1
+    update_w1 = np.dot(data.T, dw1) / n # Update for w2

-    # Calculate the error at the hidden layer
-    hidden_layer_error = np.dot(output_layer_gradients, W2.T)
-
-    # Calculate gradients for the hidden layer
-    hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
-
-    # Update weights and biases of the hidden layer
-    W1 = W1 - learning_rate * np.dot(data.T, hidden_layer_gradients) / data.shape[0]
-    b1 = b1 - learning_rate * (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0)
+    # Gradient descent    
+    w2 = w2 - learning_rate * update_w2
+    b2 = b2 - learning_rate * update_b2
+    w1 = w1 - learning_rate * update_w1
+    b1 = b1 - learning_rate * update_b1

-    # Calculate the loss using the specified metric
-    loss = loss_metrics(output_layer_output, targets,metric="MSE",status="forward")
+    # Calculate the Mean Squared Error (MSE) loss
+    loss = compute_error(predictions, targets, loss_type = 'MSE')  
    
-    return W1, b1, W2, b2, loss
+    return w1, b1, w2, b2, loss  

 #One Hot Function :
 def one_hot(targets):
@@ -133,9 +79,9 @@ def one_hot(targets):

    return one_hot_matrix

-#learn_once_cross_entropy 

-def learn_once_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate):
+# The function learn_once_mse:
+def learn_once_cross_entropy(w1, b1, w2, b2, data, targets, learning_rate):
    """
    Perform one gradient descent step using binary cross-entropy loss.

@@ -149,57 +95,45 @@ def learn_once_cross_entropy(W1, b1, W2, b2, data, targets, learning_rate):
    - Updated weights and biases (W1, b1, W2, b2) of the network.
    - Loss value for monitoring.
    """
-
    # Forward pass
-    # Implement feedforward propagation on the hidden layer
-    hidden_layer_input = np.matmul(data, W1) + b1
-    hidden_layer_output = sigmoid(hidden_layer_input, derivate=False)  # Apply the Sigmoid activation function
-
-    # Implement feedforward propagation on the output layer
-    output_layer_input = np.matmul(hidden_layer_output, W2) + b2
-    output_layer_output = softmax(output_layer_input, derivate=False)  # Apply the Softmax activation function
-
-    # Backpropagation phase
-    # Updating W2 and b2
-    output_error = output_layer_output - targets
-    dW2 = output_error * softmax(output_layer_output, derivate=True)
-    W2_update = np.dot(hidden_layer_output.T, dW2) / data.shape[0]
-    update_b2 = (1 / hidden_layer_output.shape[1]) * dW2.sum(axis=0, keepdims=True)
-
-    # Updating W1 and b1
-    hidden_layer_error = np.dot(dW2, W2.T)
-    dW1 = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
-    W1_update = np.dot(data.T, dW1) / data.shape[0]
-    update_b1 = (1 / data.shape[1]) * dW1.sum(axis=0, keepdims=True)
+    z1 = np.matmul(data, w1) + b1
+    a1 = 1 / (1 + np.exp(-z1)) 
+    z2 = np.matmul(a1, w2) + b2
+    a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)  

-    # Gradient descent
-    W2 = W2 - learning_rate * W2_update
-    W1 = W1 - learning_rate * W1_update
-    b2 = b2 - learning_rate * update_b2
-    b1 = b1 - learning_rate * update_b1
+    predictions = a2

-    # Compute loss (Binary Cross Entropy)
-    loss = loss_metrics(output_layer_output, targets, metric="BCE", status="forward")
+    one_hot_matrix = one_hot(targets)

-    return W1, b1, W2, b2, loss
+    n = data.shape[0] 

+    # Backpropagation
+    e2 = predictions - one_hot_matrix
+    dw2 = e2 * a2 * (1 - a2) / n  
+    update_w2 = np.dot(a1.T, dw2) / n
+    update_b2 = (1/a1.shape[1])*dw2.sum(axis=0, keepdims=True) 

-def calculate_accuracy(predictions, actual_values):
-    """
-    calculate_accuracy: Compute the accuracy of the model.
+    e1 = np.dot(e2, w2.T)
+    dw1 = e1 * a1 * (1 - a1)  
+    update_b1 = (1/data.shape[1])*dw1.sum(axis=0, keepdims=True)
+    update_w1 = np.dot(data.T, dw1) / n

-    Parameters:
-    - predictions: Predicted values.
-    - actual_values: Ground truth observations.
+    # Gradient descent
+    w2 = w2 - learning_rate * update_w2
+    b2 = b2 - learning_rate * update_b2
+    w1 = w1 - learning_rate * update_w1
+    b1 = b1 - learning_rate * update_b1
    
-    Returns:
-    - Accuracy as a float.
-    """
-    correct_predictions = predictions.argmax(axis=1) == actual_values.argmax(axis=1)
-    accuracy = correct_predictions.mean()
-    return accuracy
+    # Calculate binary cross-entropy loss
+    loss = compute_error(predictions, one_hot_matrix, loss_type = 'binary cross-entropy') 
    
-def train_mlp(W1, b1, W2, b2, data, targets, learning_rate):
+    # Calculate the accuray for a single batch
+    batch_accuracy = accuracy(predictions, one_hot_matrix) 
+
+    return w1, b1, w2, b2, loss, batch_accuracy
+
+#  The function train_mlp:
+def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
    """ 
    Perform training steps for a specified number of epochs.

@@ -215,43 +149,25 @@ def train_mlp(W1, b1, W2, b2, data, targets, learning_rate):
    - Updated weights and biases (W1, b1, W2, b2) of the network.
    - List of training accuracies across epochs as a list of floats.
    """
+    train_accuracies = []  # To store training accuracies across epochs

-    # Forward pass
-    hidden_layer_input = np.matmul(data, W1) + b1
-    hidden_layer_output = sigmoid(hidden_layer_input, derivate=False)
-
-    output_layer_input = np.matmul(hidden_layer_output, W2) + b2
-    output_layer_output = softmax(output_layer_input, derivate=False)
+    # Iterate through the specified number of epochs
+    for epoch in range(num_epoch):

-    N = data.shape[0]
+        # Call the 'learn_once_cross_entropy' function to update weights, calculate loss, and obtain batch accuracy
+        w1, b1, w2, b2, loss, batch_accuracy = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)

-    # Backpropagation phase
-    output_error = output_layer_output - targets
-    output_layer_gradients = output_error * softmax(output_layer_output, derivate=True)
+        # Append the batch accuracy to the 'train_accuracies' list for tracking progress
+        train_accuracies.append(batch_accuracy)

-    W2_update = np.dot(hidden_layer_output.T, output_layer_gradients) / N
-    update_b2 = (1 / hidden_layer_output.shape[1]) * output_layer_gradients.sum(axis=0, keepdims=True)
+        # Print the current epoch's progress
+        print("Epoch {}/{}".format(epoch+1, num_epoch))
+        print("[=======] Train_Accuracies : {}".format(round(batch_accuracy, 5)))

-    hidden_layer_error = np.dot(output_layer_gradients, W2.T)
-    hidden_layer_gradients = hidden_layer_error * sigmoid(hidden_layer_output, derivate=True)
+    return w1, b1, w2, b2, train_accuracies

-    W1_update = np.dot(data.T, hidden_layer_gradients) / N
-    update_b1 = (1 / data.shape[1]) * hidden_layer_gradients.sum(axis=0, keepdims=True)
-
-    # Gradient descent
-    W2 = W2 - learning_rate * W2_update
-    W1 = W1 - learning_rate * W1_update
-    b2 = b2 - learning_rate * update_b2
-    b1 = b1 - learning_rate * update_b1
-
-    # Calculate loss and accuracy
-    loss = loss_metrics(output_layer_output, targets,metric="BCE",status="forward")
-  
-    train_accuracies=calculate_accuracy(output_layer_output, targets)
-
-    return W1, b1, W2, b2, loss, train_accuracies
-
-def test_mlp(W1, b1, W2, b2, data_test, labels_test):
+# The function test_mlp:
+def test_mlp(w1,b1,w2,b2,data_test,labels_test):
    """
     Evaluate the network's performance on the test set.

@@ -263,18 +179,19 @@ def test_mlp(W1, b1, W2, b2, data_test, labels_test):
    Returns:
    - test_accuracy: The testing accuracy as a float.
    """
-    # Forward pass
-    hidden_layer_input = np.matmul(data_test, W1) + b1
-    hidden_layer_output = sigmoid(hidden_layer_input, derivate=False)
+    z1 = np.matmul(data_test, w1) + b1
+    a1 = 1 / (1 + np.exp(-z1))  
+    z2 = np.matmul(a1, w2) + b2
+    a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True)  

-    output_layer_input = np.matmul(hidden_layer_output, W2) + b2
-    output_layer_output = softmax(output_layer_input, derivate=False)
+    # Compute the testing accuracy using the 'accuracy' function
+    test_accuracy = accuracy(a2, labels_test)

-    # Compute testing accuracy
-    test_accuracy = calculate_accuracy(output_layer_output, labels_test)
    return test_accuracy 

-def run_mlp_training(X_train, labels_train, data_test, labels_test, num_hidden_units, learning_rate, num_epochs):
+
+# The function run_mlp_training:
+def run_mlp_training(X_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch):
    """
    Train an MLP classifier and evaluate its performance.

@@ -291,30 +208,31 @@ def run_mlp_training(X_train, labels_train, data_test, labels_test, num_hidden_u
    - train_accuracies: List of training accuracies across epochs.
    - test_accuracy: The final testing accuracy.
    """
-    #input_dimension = X_train.shape[1]
-    #output_dimension = np.unique(labels_train).shape[0]  # Number of classes
    
-    # Initialize weights and biases
-    W1, b1, W2, b2 = initialization(d_in, d_h, d_out)
+    d_in = X_train.shape[1]   # Input dimension
+    d_out = 10  # Output dimension: 10 classes

-    train_accuracies = []  # List to store training accuracies
+    np.random.seed(10)  # Set a random seed for reproducibility

-    # Training loop
-    for epoch in range(num_epochs):
-        W1, b1, W2, b2, loss, train_accuracy = train_mlp(W1, b1, W2, b2, X_train, one_hot(labels_train), learning_rate)
-        test_accuracy = test_mlp(W1, b1, W2, b2, data_test, one_hot(labels_test))
-        train_accuracies.append(train_accuracy)
+    # Initialize weights and biases for the neural network
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # First layer weights
+    b1 = np.zeros((1, d_h))  # First layer biases
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # Second layer weights
+    b2 = np.zeros((1, d_out))  # Second layer biases

-        print("Epoch {}/{}".format(epoch + 1, num_epochs))
-        print("Train Accuracy: {:.6f}    Test Accuracy: {:.6f}".format(round(train_accuracy, 6), round(test_accuracy, 6)))
+    # Train the MLP using the provided training data and parameters
+    w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, X_train, labels_train, learning_rate, num_epoch)

-    return train_accuracies, test_accuracy
+    # Test the trained MLP on the testing data and compute the test accuracy
+    test_accuracy = test_mlp(w1, b1, w2, b2, data_test, one_hot(labels_test))

-# plot_ANN
+    # Print the test set accuracy
+    print("test accuracy:", test_accuracy)

-import matplotlib.pyplot as plt
+    return train_accuracies, test_accuracy

-def plot_ANN(X_train, y_train, X_test, y_test):
+# Plot of the evolution of learning accuracy across learning epochs:
+def plot_ANN(data_train, labels_train, data_test, labels_test):
    """
    Plot the variation of accuracy in terms of the number of epochs.

@@ -324,29 +242,61 @@ def plot_ANN(X_train, y_train, X_test, y_test):
    - X_test: Test data matrix.
    - y_test: True labels for the test data.
    """
-    # Train an MLP and obtain training accuracies and final test accuracy
-    train_accuracies, test_accuracy = run_mlp_training(X_train, y_train, X_test, y_test, num_hidden_units=64, learning_rate=0.1, num_epochs=100)
+    # Train the MLP and obtain training accuracies and test accuracy
+    train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, 64, 0.1, 100)

-    # Display the test accuracy
-    print("Test Set Accuracy: {}".format(test_accuracy))
+    # Create a DataFrame from the accuracy values
+    df = pd.DataFrame({'Epoch': range(1, len(train_accuracies) + 1), 'Accuracy': train_accuracies})

-    # Create a Matplotlib plot
-    plt.plot(list(range(1, len(train_accuracies) + 1)), train_accuracies)
-    plt.title('Accuracy Variation Over Epochs')
+    # Create a line plot using Matplotlib
+    plt.figure(figsize=(10, 6))
+    plt.plot(df['Epoch'], df['Accuracy'], 'b')
+
+    # Add labels and title to the plot
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
+    plt.title('The Variation of Accuracy')

-    # Save the figure (optional)
+    # Save the plot as an image file
    plt.savefig("Results/mlp.png")

-    # Show the plot (optional)
-    plt.show()
+# Define accuracy function
+def accuracy(y_pred, y_true):
+    """
+    calculate_accuracy: Compute the accuracy of the model.
+
+    Parameters:
+    - predictions: Predicted values.
+    - actual_values: Ground truth observations.

+    Returns:
+    - Accuracy as a float.
+    """
+    accuracy = (y_pred.argmax(axis=1) == y_true.argmax(axis=1)).mean()
+    return accuracy

+def compute_error(predictions, targets, loss_type):
+    # Calculate the loss based on the specified loss type
    
+    if loss_type == 'MSE':  # Mean Squared Error loss
+        loss = np.mean(np.square(predictions - targets))
+    elif loss_type == 'binary cross-entropy':  # Binary Cross-Entropy loss
+        n = targets.shape[0]
+        loss = -(1/n)*np.mean((np.dot(targets.T,np.log(predictions+ 1e-7)) + np.dot((1 - targets).T,np.log((1 - predictions+ 1e-7)))))
+    else:
+        raise ValueError("Unsupported loss type. Use 'MSE' or 'binary cross-entropy'.")
    
+    return loss

+# The following code block is executed only if the script is run as the main program

+if _name_ == "_main_":

+    # Read the CIFAR-10 dataset from the specified path
+    X, y = rc.read_cifar('data\cifar-10-batches-py')
    
+    # Split the dataset into training and testing sets
+    X_train, y_train, X_test, y_test = rc.split_dataset(X, y, split=0.9)
    
+    # Plot the evolution of learning accuracy across learning epochs using the 'plot_ANN' function
+    plot_ANN(X_train, y_train, X_test, y_test)
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
--- a/test/test_mlp.py
+++ b/test/test_mlp.py
 import numpy as np

 # Importez les  fonctions 
-from mlp import initialization, train_mlp, calculate_accuracy
+from mlp import  train_mlp, accuracy

 def test_mlp_training():
    # Paramètres du test
@@ -11,6 +11,9 @@ def test_mlp_training():
    num_hidden_units = 5
    learning_rate = 0.1
    num_epochs = 10
+    d_in= 3
+    d_out=2
+    d_h=3

    # Générez des données factices pour le test
    X_train = np.random.randn(num_samples, num_features)
@@ -19,10 +22,13 @@ def test_mlp_training():
    y_test = np.random.randint(0, num_classes, num_samples)

    # Initialisez les poids et les biais
-    W1, b1, W2, b2 = initialization(num_features, num_hidden_units, num_classes)
+    w1 = 2 * np.random.rand(d_in, d_h) - 1  # First layer weights
+    b1 = np.zeros((1, d_h))  # First layer biases
+    w2 = 2 * np.random.rand(d_h, d_out) - 1  # Second layer weights
+    b2 = np.zeros((1, d_out))  # Second layer biases

    # Entraînez le modèle
-    train_accuracies, test_accuracy = train_mlp(W1, b1, W2, b2, X_train, y_train, learning_rate, num_epochs)
+    train_accuracies, test_accuracy = train_mlp(w1, b1, w2, b2, X_train, y_train, learning_rate, num_epochs)

    # Vérifiez si l'accuracy est un nombre entre 0 et 1
    assert 0 <= test_accuracy <= 1