mlp.py

import numpy as np
import matplotlib.pyplot as plt
import math
import random
from read_cifar import *
#We are using the segmoid activation function
def segmoid(x):
    return 1/(1+np.exp(-x))

#We will also need the derivation function to instore the gradient
def derivation(x):
    deriv_segmoid = segmoid(x)*(1-segmoid(x))
    return deriv_segmoid

def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
    # This function performs one gradient descent step
    # w1, b1, w2 and b2 -- the weights and biases of the network,
    # data -- a matrix of shape (batch_size x d_in)
    # targets -- a matrix of shape (batch_size x d_out)
    # learning_rate -- the learning rate
    A0=data
    A1=segmoid(np.matmul(A0, w1) + b1)
    A2=segmoid(np.matmul(A1,w2) + b2)
    #Let calculate the partial derivates
    #2
    D_A2=2*(A2-targets)
    D_A2_T=np.matmul(A2,(1-A2).T)
    D_Z2=np.matmul(D_A2_T,D_A2)
    D_W2=np.matmul(A1.T,D_Z2)
    D_B2=D_Z2
    #1
    D_A1=np.matmul(D_Z2,w2.T)
    D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
    D_B1=D_Z1
    D_W1=np.matmul(A0.T,D_Z1)
    #The backpropagation of the gradient
    w1=w1-learning_rate*D_W1
    w2=w2-learning_rate*D_W2
    b1=b1-learning_rate*D_B1
    b2=b2-learning_rate*D_B2
    # Forward pass
    G1 = np.matmul(A0, w1) + b1
    C1 = segmoid(G1)
    G2 = np.matmul(C1, w2) + b2
    C2 = segmoid(G2)
    predictions = C2

    # Compute loss (MSE)
    loss = np.mean(np.square(predictions - targets))

    return(w1,b1,w2,b2,loss)

def one_hot(D_array):
    #This function transforms an array to the one-hot encoding
    n=D_array.shape[0]
    o_h_matrix = np.zeros((D_array.shape[0],int(np.max(D_array)+1)))
    for i in range(0,n):
        o_h_matrix[i,int(D_array[i])]=1
    return o_h_matrix

def softmax(x):
    #the softmax activation function
    exp_x=np.exp(x)
    func=exp_x/exp_x.sum(axis=1, keepdims=True)
    return func

def learn_once_cross_entropy(w1,b1,w2,b2,data,targets,learning_rate):
    # This function performs one gradient descent step using a binary cross-entropy loss
    A0=data
    Targets=one_hot(targets)
    A1=segmoid(np.matmul(A0, w1) + b1)
    A2=softmax(np.matmul(A1,w2) + b2)
    #Let calculate the partial derivates
    #2
    D_Z2=np.matmul(A2-Targets)
    D_W2=np.matmul(A1.T,D_Z2)
    D_B2=D_Z2
    #1
    D_A1=np.matmul(D_Z2,w2.T)
    D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
    D_B1=D_Z1
    D_W1=np.matmul(A0.T,D_Z1)
    #The backpropagation of the gradient
    w1=w1-learning_rate*D_W1
    w2=w2-learning_rate*D_W2
    b1=b1-learning_rate*D_B1
    b2=b2-learning_rate*D_B2
    # Forward pass
    G1 = np.matmul(A0, w1) + b1
    C1 = segmoid(G1)
    G2 = np.matmul(C1, w2) + b2
    C2 = softmax(G2)
    #Cross entropy loss
    loss = -np.sum(np.multiply(Targets,np.log(C2)))/float(C2.shape[0])
    return (w1,b1,w2,b2,loss)

def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch):
    #This function returns the different accuracies of the program depending on the number of epoches chosen
    train_accuracies=[]
    for i in range(0,num_epoch):
        (w1,b1,w2,b2,loss)=learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate)
        # forward pass in order to determine the accuracy
        A0=data_train
        G1 = np.matmul(A0, w1) + b1
        C1 = segmoid(G1)
        G2 = np.matmul(C1, w2) + b2
        C2 = softmax(G2)
        predictions = np.argmax(C2,axis=1)
        acc=np.sum(predictions == labels_train)/predictions.shape[0])*100
        train_accuracies.append(acc)
     return (w1,w2,b1,b2,train_accuracies)

def test_mlp(w1,b1,w2,b2,data_test,labels_test):
    # This function tests the previous function on the data_test.
    # First: predict the classes
    A0=data_test
    G1 = np.matmul(A0, w1) + b1
    C1 = segmoid(G1)
    G2 = np.matmul(C1, w2) + b2
    C2 = softmax(G2)
    # the predicted classes
    predictions = np.argmax(C2,axis=1)
    # The accuracy of the predictions
    test_accuracy = (np.sum(predictions == labels_test)/predictions.shape[0])*100
    return test_accuracy

def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate ,num_epoch ):
    # This function trains an MLP classifier and return the training accuracies across epochs as a list of floats and the final testing accuracy as a float.
    d_in = data_train.shape[1]
    d_out = 10
    w1 = 2 * np.random.rand(d_in, d_h) - 1
    b1 = np.zeros((1, d_h))
    w2 = 2 * np.random.rand(d_h, d_out) - 1
    b2 = np.zeros((1, d_out))
    # training
    (w1,w2,b1,b2,train_accuracies)=train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch)
    # Testing
    final_accuracy=test_mlp(w1,b1,w2,b2,data_test,labels_test)
    return train_accuracies, final_accuracy