Skip to content
Snippets Groups Projects
Select Git revision
  • 470ca767d554d59508f12d93db8ff5332568ab9d
  • main default protected
2 results

image-classification

  • Clone with SSH
  • Clone with HTTPS
  • user avatar
    corentin authored
    470ca767
    History
    Name Last commit Last update
    README.md
    knn.py
    mlp.py
    read_cifar.py

    Image classification

    Corentin MASSALA

    Prepare the CIFAR dataset

    All the code can be found on the python file read_cifar.py

    2-

    def read_cifar_batch(file):
        with open(file, 'rb') as fo:
            dict = pickle.load(fo, encoding='bytes')
        return (np.array(dict[b'data']).astype('float32'), np.array(dict[b'labels']).astype('int64') )

    3-

    def read_cifar(path):
        data = []
        labels = []
    
        #Add the 5 batches
        for i in range(1,6):
            data_temp, labels_temps = read_cifar_batch(f'{path}/data_batch_{i}')
            data.append(data_temp)
            labels.append(labels_temps)
    
        #Add the test batches
        data_temp, labels_temps = read_cifar_batch(f'{path}/test_batch')
        data.append(data_temp)
        labels.append(labels_temps)
    
        #Concatenate all the batches to create a big one
        data = np.concatenate(data, axis = 0)
        labels = np.concatenate(labels, axis = 0)
    
        return(data, labels)

    4-

    To split the dataset we use the split function from the sklearn library

    def split_dataset(data, labels, split):
        X_train, X_test, y_train, y_test = train_test_split(
        data, labels, test_size=(1 - split), random_state=0)
    
        return(X_train, X_test, y_train, y_test)

    K-nearest neighbors

    All the code can be found on the python file knn.py

    1-

    def distance_matrix(matrix1, matrix2):
        #X_test then X_train in this order
        sum_of_squares_matrix1 = np.sum(np.square(matrix1), axis=1, keepdims=True) #A^2
        sum_of_squares_matrix2 = np.sum(np.square(matrix2), axis=1, keepdims=True) #B^2
    
        dot_product = np.dot(matrix1, matrix2.T) # A * B (matrix mutliplication)
        
        dists = np.sqrt(sum_of_squares_matrix1 + sum_of_squares_matrix2.T - 2 * dot_product) # Compute the product
        return dists

    2-

    def knn_predict(dists, labels_train, k):
        output = []
        # Loop on all the images_test
        for i in range(len(dists)):
            # Innitialize table to store the neighbors
            res = [0] * 10
            # Get the closest neighbors
            labels_close = np.argsort(dists[i])[:k]
            for label in labels_close:
                #add a label to the table of result
                res[labels_train[label]] += 1
            # Get the class with the maximum neighbors
            label_temp = np.argmax(res) #Careful to the logic here, if there is two or more maximum, the function the first maximum encountered
            output.append(label_temp)
        return(np.array(output))

    3-

    def evaluate_knn(data_train, labels_train, data_test, labels_tests, k):
        dist = distance_matrix(data_test, data_train)
        result_test = knn_predict(dist, labels_train, k)
    
        #accuracy 
        N = labels_tests.shape[0]
        accuracy = (labels_tests == result_test).sum() / N
        return(accuracy)

    4-

    def bench_knn():
    
        k_indices = [i for i in range(20) if i % 2 != 0]
        accuracies = []
    
        # Load data
        data, labels = read_cifar.read_cifar('image-classification/data/cifar-10-batches-py')
        X_train, X_test, y_train, y_test = read_cifar.split_dataset(data, labels, 0.9)
    
        # Loop on the k_indices to get all the accuracies
        for k in k_indices:
            accuracy = evaluate_knn(X_train, y_train, X_test, y_test, k)
            accuracies.append(accuracy)
        
        # Save and show the graph of accuracies
        fig = plt.figure()
        plt.plot(k_indices, accuracies)
        plt.title("Accuracy as function of k")
        plt.show()
        plt.savefig('image-classification/results/knn_batch_1.png')
        plt.close(fig)

    Here is the graph of the accuracy vs K for the whole Cifar dataset with a split factor of 0.9:

    Image

    Here we can conclude that the best K is 9, (if we don't use k = 1) with a performace of 35% of accuracy.

    Artificial Neural Network

    Math Theory

    Here are all the answer for the theory of the backpropagation.

    1-

    Image

    2-

    Image

    3-

    Image

    4-

    Image

    5-

    Image

    6-

    Image

    7-

    Image

    8-

    Image

    9-

    Image

    Coding part

    All the code can be found on the file mlp.py

    def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
    
        N_out = len(targets) #number of training examples
    
        # Forward pass
        a0 = data # the data are the input of the first layer
        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
        a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
        z2 = np.matmul(a1, w2) + b2  # input of the output layer
        a2 = sigmoid(z2)  # output of the output layer (sigmoid activation function)
        predictions = a2  # the predicted values are the outputs of the output layer
    
        # Compute loss (MSE)
        loss = np.mean(np.square(predictions - targets))
        print(f'loss: {loss}')
        # print('shape a1', a1.shape)
        # print('shape w1', w1.shape)
        # print('shape b1', b1.shape)
    
        # print('shape a2', a2.shape)
        # print('shape w2', w2.shape)
        # print('shape b2', b2.shape)
       
        # Backpropagation
        delta_a2 = 2 / N_out * (a2 - targets)
        # print('shape delta_a2', delta_a2.shape)
        delta_z2 = delta_a2 * (a2 * (1 - a2)) 
        # print('shape delta_z2', delta_z2.shape)
        delta_w2 = np.dot(a1.T, delta_z2)
        # print('shape delta_w2', delta_w2.shape)
        delta_b2 = delta_z2
    
        delta_a1 = np.dot(delta_z2, w2.T)
        # print('shape delta_a1', delta_a1.shape)
        delta_z1 = delta_a1 * (a1 * (1- a1))
        # print('shape delta_z1', delta_z1.shape)
        delta_w1 = np.dot(a0.T, delta_z1)
        # print('shape delta_w1', delta_w2.shape)
        delta_b1 = delta_z1
    
        # Update weights and biases
        w2 -= learning_rate * delta_w2
        b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
    
        w1 -= learning_rate * delta_w1
        b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
    
        return w1, b1, w2, b2, loss

    11-

    def one_hot(labels):
        #num_classes = np.max(labels) + 1 on va le hardcoder ici
        num_classes = 10
        one_hot_matrix = np.eye(num_classes)[labels]
        return one_hot_matrix

    12-

    The cross_entropy_loss is :

    def cross_entropy_loss(y_pred, y_true):
        loss = -np.sum(y_true * np.log(y_pred)) / len(y_pred)
        return loss

    The new learning function is :

    def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
    
        N_out = len(labels_train) #number of training examples
    
        # Forward pass
        a0 = data # the data are the input of the first layer
        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
        a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
        z2 = np.matmul(a1, w2) + b2  # input of the output layer
        a2 = softmax_stable(z2)  # output of the output layer (sigmoid activation function)
        predictions = a2  # the predicted values are the outputs of the output layer
    
    
        # Compute loss (cross-entropy loss)
        y_true_one_hot = one_hot(labels_train)
        loss = cross_entropy_loss(predictions, y_true_one_hot)
    
    
        # Backpropagation
        delta_z2 = (a2 - y_true_one_hot) 
        delta_w2 = np.dot(a1.T, delta_z2) / N_out # We divide by the sample size to have an average on the error and avoid big gradient jumps
        delta_b2 = delta_z2 / N_out
    
    
        delta_a1 = np.dot(delta_z2, w2.T)
        delta_z1 = delta_a1 * (a1 * (1 - a1))
        delta_w1 = np.dot(a0.T, delta_z1) / N_out
        delta_b1 = delta_z1 / N_out
    
        # Update weights and biases
        w2 -= learning_rate * delta_w2
        b2 -= learning_rate * np.sum(delta_b2, axis = 0, keepdims = True)
    
        w1 -= learning_rate * delta_w1
        b1 -= learning_rate * np.sum(delta_b1, axis = 0, keepdims = True)
    
        return w1, b1, w2, b2, loss

    13-

    def forward(w1, b1, w2, b2, data):
        # Forward pass
        a0 = data # the data are the input of the first layer
        z1 = np.matmul(a0, w1) + b1  # input of the hidden layer
        a1 = sigmoid(z1)  # output of the hidden layer (sigmoid activation function)
        z2 = np.matmul(a1, w2) + b2  # input of the output layer
        a2 = softmax_stable(z2)  # output of the output layer (sigmoid activation function)
        predictions = a2  # the predicted values are the outputs of the output layer
        return(predictions)
    def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
        train_accuracies = []
        for epoch in range(num_epoch):
            w1, b1, w2, b2, loss = learn_once_cross_entropy(w1, b1, w2, b2, data_train, labels_train, learning_rate)
    
            # Compute accuracy
            predictions = forward(w1, b1, w2, b2, data_train)
            predicted_labels = np.argmax(predictions, axis=1)
            # print(predictions.shape)
            # print(predicted_labels.shape)
            # print(labels_train.shape)
            accuracy = np.mean(predicted_labels == labels_train)
            train_accuracies.append(accuracy)
    
            print(f'Epoch {epoch + 1}/{num_epoch}, Loss: {loss:.3f}, Train Accuracy: {accuracy:.2f}')
    
        return w1, b1, w2, b2, train_accuracies

    14-

     
        # Compute accuracy
        predictions = forward(w1, b1, w2, b2, data_test)
        predicted_labels = np.argmax(predictions, axis=1)
        print(predicted_labels)
        test_accuracy = np.mean(predicted_labels == labels_test)
        print(f'Train Accuracy: {test_accuracy:.2f}')
        return test_accuracy

    15-

    def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h,learning_rate, num_epoch):
    
        d_in = data_train.shape[1]
        d_out = 10 #we can hard code it here or len(np.unique(label_train))
    
        #Random initialisation of weights
        w1 = np.random.randn(d_in, d_h)
        b1 = np.random.randn(1, d_h)
    
        w2 = np.random.randn(d_h, d_out)
        b2 = np.random.randn(1, d_out)
    
        # Train MLP
        w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
    
        # Test MLP
        test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
        return train_accuracies, test_accuracy