Select Git revision
zip-codes-romain.py
mlp.py 4.64 KiB
import numpy as np
import matplotlib.pyplot as plt
import math
import random
from read_cifar import *
#We are using the segmoid activation function
def segmoid(x):
return 1/(1+np.exp(-x))
#We will also need the derivation function to instore the gradient
def derivation(x):
deriv_segmoid = segmoid(x)*(1-segmoid(x))
return deriv_segmoid
def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate):
# This function performs one gradient descent step
# w1, b1, w2 and b2 -- the weights and biases of the network,
# data -- a matrix of shape (batch_size x d_in)
# targets -- a matrix of shape (batch_size x d_out)
# learning_rate -- the learning rate
A0=data
A1=segmoid(np.matmul(A0, w1) + b1)
A2=segmoid(np.matmul(A1,w2) + b2)
#Let calculate the partial derivates
#2
D_A2=2*(A2-targets)
D_A2_T=np.matmul(A2,(1-A2).T)
D_Z2=np.matmul(D_A2_T,D_A2)
D_W2=np.matmul(A1.T,D_Z2)
D_B2=D_Z2
#1
D_A1=np.matmul(D_Z2,w2.T)
D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
D_B1=D_Z1
D_W1=np.matmul(A0.T,D_Z1)
#The backpropagation of the gradient
w1=w1-learning_rate*D_W1
w2=w2-learning_rate*D_W2
b1=b1-learning_rate*D_B1
b2=b2-learning_rate*D_B2
# Forward pass
G1 = np.matmul(A0, w1) + b1
C1 = segmoid(G1)
G2 = np.matmul(C1, w2) + b2
C2 = segmoid(G2)
predictions = C2
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
return(w1,b1,w2,b2,loss)
def one_hot(D_array):
#This function transforms an array to the one-hot encoding
n=D_array.shape[0]
o_h_matrix = np.zeros((D_array.shape[0],int(np.max(D_array)+1)))
for i in range(0,n):
o_h_matrix[i,int(D_array[i])]=1
return o_h_matrix
def softmax(x):
#the softmax activation function
exp_x=np.exp(x)
func=exp_x/exp_x.sum(axis=1, keepdims=True)
return func
def learn_once_cross_entropy(w1,b1,w2,b2,data,targets,learning_rate):
# This function performs one gradient descent step using a binary cross-entropy loss
A0=data
Targets=one_hot(targets)
A1=segmoid(np.matmul(A0, w1) + b1)
A2=softmax(np.matmul(A1,w2) + b2)
#Let calculate the partial derivates
#2
D_Z2=np.matmul(A2-Targets)
D_W2=np.matmul(A1.T,D_Z2)
D_B2=D_Z2
#1
D_A1=np.matmul(D_Z2,w2.T)
D_Z1=np.matmul(np.matmul(A1,(1-A1).T),D_A1)
D_B1=D_Z1
D_W1=np.matmul(A0.T,D_Z1)
#The backpropagation of the gradient
w1=w1-learning_rate*D_W1
w2=w2-learning_rate*D_W2
b1=b1-learning_rate*D_B1
b2=b2-learning_rate*D_B2
# Forward pass
G1 = np.matmul(A0, w1) + b1
C1 = segmoid(G1)
G2 = np.matmul(C1, w2) + b2
C2 = softmax(G2)
#Cross entropy loss
loss = -np.sum(np.multiply(Targets,np.log(C2)))/float(C2.shape[0])
return (w1,b1,w2,b2,loss)
def train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch):
#This function returns the different accuracies of the program depending on the number of epoches chosen
train_accuracies=[]
for i in range(0,num_epoch):
(w1,b1,w2,b2,loss)=learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate)
# forward pass in order to determine the accuracy
A0=data_train
G1 = np.matmul(A0, w1) + b1
C1 = segmoid(G1)
G2 = np.matmul(C1, w2) + b2
C2 = softmax(G2)
predictions = np.argmax(C2,axis=1)
acc=np.sum(predictions == labels_train)/predictions.shape[0])*100
train_accuracies.append(acc)
return (w1,w2,b1,b2,train_accuracies)
def test_mlp(w1,b1,w2,b2,data_test,labels_test):
# This function tests the previous function on the data_test.
# First: predict the classes
A0=data_test
G1 = np.matmul(A0, w1) + b1
C1 = segmoid(G1)
G2 = np.matmul(C1, w2) + b2
C2 = softmax(G2)
# the predicted classes
predictions = np.argmax(C2,axis=1)
# The accuracy of the predictions
test_accuracy = (np.sum(predictions == labels_test)/predictions.shape[0])*100
return test_accuracy
def run_mlp_training(data_train, labels_train, data_test, labels_test,d_h,learning_rate ,num_epoch ):
# This function trains an MLP classifier and return the training accuracies across epochs as a list of floats and the final testing accuracy as a float.
d_in = data_train.shape[1]
d_out = 10
w1 = 2 * np.random.rand(d_in, d_h) - 1
b1 = np.zeros((1, d_h))
w2 = 2 * np.random.rand(d_h, d_out) - 1
b2 = np.zeros((1, d_out))
# training
(w1,w2,b1,b2,train_accuracies)=train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epoch)
# Testing
final_accuracy=test_mlp(w1,b1,w2,b2,data_test,labels_test)
return train_accuracies, final_accuracy