mlp.py

import numpy as np
import matplotlib.pyplot as plt
from read_cifar import read_cifar_batch, read_cifar, split_dataset

N = 30  # number of input data
d_in = 3  # input dimension
d_h = 3  # number of neurons in the hidden layer
d_out = 2  # output dimension (number of neurons of the output layer)

# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1  # first layer weights
b1 = np.zeros((1, d_h))  # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1  # second layer weights
b2 = np.zeros((1, d_out))  # second layer biaises

data = np.random.rand(N, d_in)  # create a random data
targets = np.random.rand(N, d_out)  # create a random targets

def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):

    # Forward pass
    a0 = data # the data are the input of the first layer
    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
    z2 = np.matmul(a1, w2) + b2  # input of the output layer
    a2 = sigmoid(z2) # output of the output layer (sigmoid activation function)
    predictions = a2  # the predicted values are the outputs of the output layer

    # Compute loss (MSE)
    loss = np.mean(np.square(predictions - targets))
    print(loss)


    # Backpropagation
    dC_da2 = 2 * np.sum(predictions - targets) / N
    dC_dz2 = dC_da2 * predictions * (1 - predictions)
    dC_dw2 = np.matmul(a1.T, dC_dz2)
    dC_db2 = np.sum(dC_dz2, axis=0)

    dC_da1 = np.matmul(dC_dz2, w2.T)
    dC_dz1 = dC_da1 * a1 * (1 - a1)
    dC_dw1 = np.matmul(a0.T, dC_dz1)
    dC_db1 = np.sum(dC_dz1, axis=0)

    # Update weights and biases
    w2 = w2 - learning_rate * dC_dw2
    b2 = b2 - learning_rate * dC_db2

    w1 = w1 - learning_rate * dC_dw1
    b1 = b1 - learning_rate * dC_db1

    return w1, b1, w2, b2, loss

#Convert a list into a matrix
def one_hot(labels):
    n_samples = len(labels)
    n_unique = len(np.unique(labels))
    one_hot_matrix = np.zeros((n_samples, n_unique))
    one_hot_matrix[np.arange(n_samples), labels] = 1
    return one_hot_matrix

def softmax(z):
    exp_z = np.exp(z)
    sum = exp_z.sum()
    softmax_z = exp_z / sum
    return softmax_z

def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):

    a0 = data # the data are the input of the first layer
    z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
    a1 = 1 / (1 + np.exp(-z1))  # output of the hidden layer (sigmoid activation function)
    z2 = np.matmul(a1, w2) + b2  # input of the output layer
    a2 = softmax(z2) #Softmax activation layer
    predict = a2  # the predicted values are the outputs of the output layer

    targets = one_hot(labels_train)

    # Compute the loss
    loss = loss = -np.sum(targets * np.log(predict))

    # Backpropagation
    dC_da2 = 2 * np.sum(predict - targets) / N
    dC_dz2 = (2 / N) * (predict - targets) * a2 * (1 - predict)
    dC_dw2 = np.dot(a1.T, dC_dz2)
    dC_db2 = dC_dz2

    dC_da1 = np.dot(dC_dz2, w2.T)
    dC_dz1 = dC_da1 * a1 * (1 - a1)
    dC_dw1 = np.dot(a0.T, dC_dz1)
    dC_db1 = np.sum(dC_dz1, axis=0)

    # Update weights and biases
    w2 = w2 - learning_rate * dC_dw2
    b2 = b2 - learning_rate * dC_db2

    w1 = w1 - learning_rate * dC_dw1
    b1 = b1 - learning_rate * dC_db1

    return w1, b1, w2, b2, loss

def accuracy(labels, predictions):
    return np.mean(labels == predictions)

def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
    train_accuracies = []
    batch_size, d_in = data_train.shape
    train_accuracies = []
    N = len(labels_train)  # Number of samples in the training set
        # Ensure num_epoch is an integer
    num_epoch = int(np.isscalar(num_epoch))

    for i in range(num_epoch):
        # Forward propagation
        z1 = np.dot(data_train, w1) + b1
        a1 = sigmoid(z1)
        z2 = np.dot(a1, w2) + b2
        a2 = softmax(z2)

        predicted_labels = np.argmax(a2, axis = 1)  # the predicted labels

        # Compute the accuracy on the training set
        train_accuracy = accuracy(labels_train, predicted_labels)
        train_accuracies.append(train_accuracy)
        # train_accurary = np.mean(np.array(predicted_labels) == np.array(labels_train))


    return w1, b1, w2, b2, train_accuracies

def test_mlp(w1, b1, w2, b2, data_test, labels_test):
    predictions = []

    # Forward propagation
    a0 = data_test
    z1 = np.dot(a0, w1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, w2) + b2
    a2 = sigmoid(z2)


    predicted_labels_test = np.argmax(a2, axis=1)
    test_accuracy = np.mean(predicted_labels_test == labels_test)

    return test_accuracy


def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs):
    d_in = data_train.shape[1]
    d_out = len(np.unique(labels_train))

    # Initialize weights and biases
    w1 = np.random.randn(d_in, d_h)
    b1 = np.zeros((1, d_h))
    w2 = np.random.randn(d_h, d_out)
    b2 = np.zeros((1, d_out))

    # Train MLP
    w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs)

    # Test MLP
    final_test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)

    return train_accuracies, final_test_accuracy


if __name__ == "__main__":

    # Create train and test datasets
    data, labels = read_cifar(r"C:\Users\etulyon1\OneDrive\Desktop\ECL\Apprentissage profond & Intelligence Artificielle\BE1\image-classification\data\cifar-10-batches-py")
    split_factor = 0.9
    a, b, c, d = split_dataset(data, labels, split_factor)

    # Define the network hyper-parameters and train it
    d_h = 64
    learning_rate = 0.1
    num_epoch = 100

    train_accuracies, final_accuracy = run_mlp_training(a, b, c, d, d_h, learning_rate, num_epoch)
    print("The accuracy of the network on the test dataset is " + str(final_accuracy) + "%")

    # Plot the evolution of the accuracy with the number training steps
    plt.plot(train_accuracies)
    plt.xlabel("Number of training steps")
    plt.ylabel("Accuracy (in %)")
    plt.title("Accuracy of the Neural Network depending on the number of iterations of training")
    plt.show()