Skip to content
Snippets Groups Projects
Select Git revision
  • 4c05545129234d14561ebcee1cce36b5e8714425
  • main default protected
2 results

image-classification

  • Clone with SSH
  • Clone with HTTPS
  • user avatar
    corentin authored
    4c055451
    History
    Name Last commit Last update
    results
    theory
    README.md
    knn.py
    mlp.py
    read_cifar.py

    Image classification

    Corentin MASSALA

    Prepare the CIFAR dataset

    All the code can be found on the python file read_cifar.py

    2-

    def read_cifar_batch(file):
        with open(file, 'rb') as fo:
            dict = pickle.load(fo, encoding='bytes')
        return (np.array(dict[b'data']).astype('float32'), np.array(dict[b'labels']).astype('int64') )

    3-

    def read_cifar(path):
        data = []
        labels = []
    
        #Add the 5 batches
        for i in range(1,6):
            data_temp, labels_temps = read_cifar_batch(f'{path}/data_batch_{i}')
            data.append(data_temp)
            labels.append(labels_temps)
    
        #Add the test batches
        data_temp, labels_temps = read_cifar_batch(f'{path}/test_batch')
        data.append(data_temp)
        labels.append(labels_temps)
    
        #Concatenate all the batches to create a big one
        data = np.concatenate(data, axis = 0)
        labels = np.concatenate(labels, axis = 0)
    
        return(data, labels)

    4-

    To split the dataset we use the split function from the sklearn library

    def split_dataset(data, labels, split):
        X_train, X_test, y_train, y_test = train_test_split(
        data, labels, test_size=(1 - split), random_state=0)
    
        return(X_train, X_test, y_train, y_test)

    K-nearest neighbors

    All the code can be found on the python file knn.py

    1-

    def distance_matrix(matrix1, matrix2):
        #X_test then X_train in this order
        sum_of_squares_matrix1 = np.sum(np.square(matrix1), axis=1, keepdims=True) #A^2
        sum_of_squares_matrix2 = np.sum(np.square(matrix2), axis=1, keepdims=True) #B^2
    
        dot_product = np.dot(matrix1, matrix2.T) # A * B (matrix mutliplication)
        
        dists = np.sqrt(sum_of_squares_matrix1 + sum_of_squares_matrix2.T - 2 * dot_product) # Compute the product
        return dists

    2-

    def knn_predict(dists, labels_train, k):
        output = []
        # Loop on all the images_test
        for i in range(len(dists)):
            # Innitialize table to store the neighbors
            res = [0] * 10
            # Get the closest neighbors
            labels_close = np.argsort(dists[i])[:k]
            for label in labels_close:
                #add a label to the table of result
                res[labels_train[label]] += 1
            # Get the class with the maximum neighbors
            label_temp = np.argmax(res) #Careful to the logic here, if there is two or more maximum, the function the first maximum encountered
            output.append(label_temp)
        return(np.array(output))

    3-

    def evaluate_knn(data_train, labels_train, data_test, labels_tests, k):
        dist = distance_matrix(data_test, data_train)
        result_test = knn_predict(dist, labels_train, k)
    
        #accuracy 
        N = labels_tests.shape[0]
        accuracy = (labels_tests == result_test).sum() / N
        return(accuracy)

    4-

    def bench_knn():
    
        k_indices = [i for i in range(20) if i % 2 != 0]
        accuracies = []
    
        # Load data
        data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py')
        X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9)
    
        # Loop on the k_indices to get all the accuracies
        for k in k_indices:
            accuracy = evaluate_knn(X_train, y_train, X_test, y_test, k)
            accuracies.append(accuracy)
        
        # Save and show the graph of accuracies
        fig = plt.figure()
        plt.plot(k_indices, accuracies)
        plt.title("Accuracy as function of k")
        plt.show()
        plt.savefig('image-classification/results/knn_batch_1.png')
        plt.close(fig)

    Here is the graph of the accuracy vs K for the whole Cifar dataset with a split factor of 0.9:

    Image

    Here we can conclude that the best K is 9, (if we don't use k = 1) with a performace of 35% of accuracy.

    Artificial Neural Network

    Math Theory

    Here are all the answer for the theory of the backpropagation.

    1-

    Image

    2-

    Image

    3-

    Image

    4-

    Image

    5-

    Image

    6-

    Image

    7-

    Image

    8-

    Image

    9-

    Image

    Coding part

    All the code can be found on the file mlp.py

    def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
    
        N_out = len(targets) #number of training examples
    
        # Forward pass
        a0 = data # the data are the input of the first layer
        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
        a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
        z2 = np.matmul(a1, w2) + b2  # input of the output layer
        a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
        predictions = a2  # the predicted values are the outputs of the output layer
    
        # Compute loss (MSE)
        loss = np.mean(np.square(predictions - targets))
        print(f'loss: {loss}')
        # print('shape a1', a1.shape)
        # print('shape w1', w1.shape)
        # print('shape b1', b1.shape)
    
        # print('shape a2', a2.shape)
        # print('shape w2', w2.shape)
        # print('shape b2', b2.shape)
       
        # Backpropagation
        delta_a2 = 2 / N_out * (a2 - targets)
        # print('shape delta_a2', delta_a2.shape)
        delta_z2 = delta_a2 * (a2 * (1 - a2)) 
        # print('shape delta_z2', delta_z2.shape)
        delta_w2 = np.dot(a1.T, delta_z2)
        # print('shape delta_w2', delta_w2.shape)
        delta_b2 = delta_z2
    
        delta_a1 = np.dot(delta_z2, w2.T)
        # print('shape delta_a1', delta_a1.shape)
        delta_z1 = delta_a1 * (a1 * (1- a1))
        # print('shape delta_z1', delta_z1.shape)
        delta_w1 = np.dot(a0.T, delta_z1)
        # print('shape delta_w1', delta_w2.shape)
        delta_b1 = delta_z1
    
        # Update weights and biases
        w2 -= learning_rate * delta_w2
        b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
    
        w1 -= learning_rate * delta_w1
        b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
    
        return w1, b1, w2, b2, loss

    11-

    def one_hot(labels):
        #num_classes = np.max(labels) + 1 on va le hardcoder ici
        num_classes = 10
        one_hot_matrix = np.eye(num_classes)[labels]
        return one_hot_matrix

    12-

    The cross_entropy_loss is :

    def cross_entropy_loss(y_pred, y_true):
        loss = -np.sum(y_true * np.log(y_pred)) / len(y_pred)
        return loss

    The new learning function is :

    def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
    
        N_out = len(labels_train) #number of training examples
    
        # Forward pass
        a0 = data # the data are the input of the first layer
        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
        a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
        z2 = np.matmul(a1, w2) + b2  # input of the output layer
        a2 = softmax_stable(z2)  # output of the output layer (sigmoid activation function)
        predictions = a2  # the predicted values are the outputs of the output layer
    
    
        # Compute loss (cross-entropy loss)
        y_true_one_hot = one_hot(labels_train)
        loss = cross_entropy_loss(predictions, y_true_one_hot)
    
    
        # Backpropagation
        delta_z2 = (a2 - y_true_one_hot) 
        delta_w2 = np.dot(a1.T, delta_z2) / N_out # We divide by the sample size to have an average on the error and avoid big gradient jumps
        delta_b2 = delta_z2 / N_out
    
    
        delta_a1 = np.dot(delta_z2, w2.T)
        delta_z1 = delta_a1 * (a1 * (1 - a1))
        delta_w1 = np.dot(a0.T, delta_z1) / N_out
        delta_b1 = delta_z1 / N_out
    
        # Update weights and biases
        w2 -= learning_rate * delta_w2
        b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
    
        w1 -= learning_rate * delta_w1
        b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
    
        return w1, b1, w2, b2, loss

    13-

    def forward(w1, b1, w2, b2, data):
        # Forward pass
        a0 = data # the data are the input of the first layer
        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
        a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
        z2 = np.matmul(a1, w2) + b2  # input of the output layer
        a2 = softmax_stable(z2)  # output of the output layer (sigmoid activation function)
        predictions = a2  # the predicted values are the outputs of the output layer
        return(predictions)
    def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
        train_accuracies = []
        for epoch in range(num_epoch):
            w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
    
            # Compute accuracy
            predictions = forward(w1, b1, w2, b2, data_train)
            predicted_labels = np.argmax(predictions, axis=1)
            # print(predictions.shape)
            # print(predicted_labels.shape)
            # print(labels_train.shape)
            accuracy = np.mean(predicted_labels == labels_train)
            train_accuracies.append(accuracy)
    
            print(f'Epoch {epoch + 1}/{num_epoch}, Loss: {loss:.3f}, Train Accuracy: {accuracy:.2f}')
    
        return w1, b1, w2, b2, train_accuracies

    14-

    def test_mlp(w1, b1, w2, b2, data_test, labels_test):
     
        # Compute accuracy
        predictions = forward(w1, b1, w2, b2, data_test)
        predicted_labels = np.argmax(predictions, axis=1)
        print(predicted_labels)
        test_accuracy = np.mean(predicted_labels == labels_test)
        print(f'Train Accuracy: {test_accuracy:.2f}')
        return test_accuracy

    15-

    def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h,learning_rate, num_epoch):
    
        d_in = data_train.shape[1]
        d_out = 10 #we can hard code it here or len(np.unique(label_train))
    
        #Random initialisation of weights (Xavier initialisation)
        w1 = np.random.randn(d_in, d_h) / np.sqrt(d_in)
        b1 = np.zeros((1, d_h))
        w2 = np.random.randn(d_h, d_out) / np.sqrt(d_h)
        b2 = np.zeros((1, d_out))
    
        # Train MLP
        w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
    
        # Test MLP
        test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
        return train_accuracies, test_accuracy

    16-

    def plot_graph(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch):
        # Run MLP training
        train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch)
        
        # Plot and save the learning accuracy graph
        plt.figure(figsize=(8, 6))
        epochs = np.arange(1, num_epoch + 1)
        plt.plot(epochs, train_accuracies, marker='x', color='b', label='Train Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title('MLP Train Accuracy')
        plt.legend()
        plt.grid(True)
        plt.savefig('image-classification/results/mlp.png')
        plt.show()

    Image Image

    The accuracy is increasing with each epochs without converging, we could increase the learning rate to speed up the training and inscrease the numbers of epoch to see what would be our maximum accuracy. For 100 epochs and a learning rate of 0.1 we got a test accuracy of 0.13. For 300 epochs and a learning rate of 0.1 we increased the training accuracy to 0.15991 and we got a test accuracy of 0.155