Skip to content
Snippets Groups Projects
Select Git revision
  • 4527a199d3cc7ab06c93efa711b8e7e6f75f0d68
  • master default protected
2 results

zip-codes-romain.py

Blame
  • mlp.py 4.64 KiB
    import numpy as np
    import matplotlib.pyplot as plt
    import math
    import random
    from read_cifar import *
    #We are using the segmoid activation function
    def segmoid(x):
        return 1/(1+np.exp(-x))
    
    #We will also need the derivation function to instore the gradient
    def derivation(x):
        deriv_segmoid = segmoid(x)*(1-segmoid(x))
        return deriv_segmoid
    
    def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
        # This function performs one gradient descent step
        # w1, b1, w2 and b2 -- the weights and biases of the network,
        # data -- a matrix of shape (batch_size x d_in)
        # targets -- a matrix of shape (batch_size x d_out)
        # learning_rate -- the learning rate
        A0=data
        A1=segmoid(np.matmul(A0, w1) + b1)
        A2=segmoid(np.matmul(A1,w2) + b2)
        #Let calculate the partial derivates
        #2
        D_A2=2*(A2-targets)
        D_A2_T=np.matmul(A2,(1-A2).T)
        D_Z2=np.matmul(D_A2_T,D_A2)
        D_W2=np.matmul(A1.T,D_Z2)
        D_B2=D_Z2
        #1
        D_A1=np.matmul(D_Z2,w2.T)
        D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
        D_B1=D_Z1
        D_W1=np.matmul(A0.T,D_Z1)
        #The backpropagation of the gradient
        w1=w1-learning_rate*D_W1
        w2=w2-learning_rate*D_W2
        b1=b1-learning_rate*D_B1
        b2=b2-learning_rate*D_B2
        # Forward pass
        G1 = np.matmul(A0, w1) + b1
        C1 = segmoid(G1)
        G2 = np.matmul(C1, w2) + b2
        C2 = segmoid(G2)
        predictions = C2
    
        # Compute loss (MSE)
        loss = np.mean(np.square(predictions - targets))
    
        return(w1,b1,w2,b2,loss)
    
    def one_hot(D_array):
        #This function transforms an array to the one-hot encoding
        n=D_array.shape[0]
        o_h_matrix = np.zeros((D_array.shape[0],int(np.max(D_array)+1)))
        for i in range(0,n):
            o_h_matrix[i,int(D_array[i])]=1
        return o_h_matrix
    
    def softmax(x):
        #the softmax activation function
        exp_x=np.exp(x)
        func=exp_x/exp_x.sum(axis=1, keepdims=True)
        return func
    
    def learn_once_cross_entropy(w1,b1,w2,b2,data,targets,learning_rate):
        # This function performs one gradient descent step using a binary cross-entropy loss
        A0=data
        Targets=one_hot(targets)
        A1=segmoid(np.matmul(A0, w1) + b1)
        A2=softmax(np.matmul(A1,w2) + b2)
        #Let calculate the partial derivates
        #2
        D_Z2=np.matmul(A2-Targets)
        D_W2=np.matmul(A1.T,D_Z2)
        D_B2=D_Z2
        #1
        D_A1=np.matmul(D_Z2,w2.T)
        D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
        D_B1=D_Z1
        D_W1=np.matmul(A0.T,D_Z1)
        #The backpropagation of the gradient
        w1=w1-learning_rate*D_W1
        w2=w2-learning_rate*D_W2
        b1=b1-learning_rate*D_B1
        b2=b2-learning_rate*D_B2
        # Forward pass
        G1 = np.matmul(A0, w1) + b1
        C1 = segmoid(G1)
        G2 = np.matmul(C1, w2) + b2
        C2 = softmax(G2)
        #Cross entropy loss
        loss = -np.sum(np.multiply(Targets,np.log(C2)))/float(C2.shape[0])
        return (w1,b1,w2,b2,loss)
    
    def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch):
        #This function returns the different accuracies of the program depending on the number of epoches chosen
        train_accuracies=[]
        for i in range(0,num_epoch):
            (w1,b1,w2,b2,loss)=learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate)
            # forward pass in order to determine the accuracy
            A0=data_train
            G1 = np.matmul(A0, w1) + b1
            C1 = segmoid(G1)
            G2 = np.matmul(C1, w2) + b2
            C2 = softmax(G2)
            predictions = np.argmax(C2,axis=1)
            acc=np.sum(predictions == labels_train)/predictions.shape[0])*100
            train_accuracies.append(acc)
         return (w1,w2,b1,b2,train_accuracies)
    
    def test_mlp(w1,b1,w2,b2,data_test,labels_test):
        # This function tests the previous function on the data_test.
        # First: predict the classes
        A0=data_test
        G1 = np.matmul(A0, w1) + b1
        C1 = segmoid(G1)
        G2 = np.matmul(C1, w2) + b2
        C2 = softmax(G2)
        # the predicted classes
        predictions = np.argmax(C2,axis=1)
        # The accuracy of the predictions
        test_accuracy = (np.sum(predictions == labels_test)/predictions.shape[0])*100
        return test_accuracy
    
    def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate ,num_epoch ):
        # This function trains an MLP classifier and return the training accuracies across epochs as a list of floats and the final testing accuracy as a float.
        d_in = data_train.shape[1]
        d_out = 10
        w1 = 2 * np.random.rand(d_in, d_h) - 1
        b1 = np.zeros((1, d_h))
        w2 = 2 * np.random.rand(d_h, d_out) - 1
        b2 = np.zeros((1, d_out))
        # training
        (w1,w2,b1,b2,train_accuracies)=train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch)
        # Testing
        final_accuracy=test_mlp(w1,b1,w2,b2,data_test,labels_test)
        return train_accuracies, final_accuracy