Select Git revision
mlp.py 6.23 KiB
import numpy as np
import matplotlib.pyplot as plt
from read_cifar import read_cifar_batch, read_cifar, split_dataset
N = 30 # number of input data
d_in = 3 # input dimension
d_h = 3 # number of neurons in the hidden layer
d_out = 2 # output dimension (number of neurons of the output layer)
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
data = np.random.rand(N, d_in) # create a random data
targets = np.random.rand(N, d_out) # create a random targets
def sigmoid(x):
return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate):
# Forward pass
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = sigmoid(z2) # output of the output layer (sigmoid activation function)
predictions = a2 # the predicted values are the outputs of the output layer
# Compute loss (MSE)
loss = np.mean(np.square(predictions - targets))
print(loss)
# Backpropagation
dC_da2 = 2 * np.sum(predictions - targets) / N
dC_dz2 = dC_da2 * predictions * (1 - predictions)
dC_dw2 = np.matmul(a1.T, dC_dz2)
dC_db2 = np.sum(dC_dz2, axis=0)
dC_da1 = np.matmul(dC_dz2, w2.T)
dC_dz1 = dC_da1 * a1 * (1 - a1)
dC_dw1 = np.matmul(a0.T, dC_dz1)
dC_db1 = np.sum(dC_dz1, axis=0)
# Update weights and biases
w2 = w2 - learning_rate * dC_dw2
b2 = b2 - learning_rate * dC_db2
w1 = w1 - learning_rate * dC_dw1
b1 = b1 - learning_rate * dC_db1
return w1, b1, w2, b2, loss
#Convert a list into a matrix
def one_hot(labels):
n_samples = len(labels)
n_unique = len(np.unique(labels))
one_hot_matrix = np.zeros((n_samples, n_unique))
one_hot_matrix[np.arange(n_samples), labels] = 1
return one_hot_matrix
def softmax(z):
exp_z = np.exp(z)
sum = exp_z.sum()
softmax_z = exp_z / sum
return softmax_z
def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate):
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = softmax(z2) #Softmax activation layer
predict = a2 # the predicted values are the outputs of the output layer
targets = one_hot(labels_train)
# Compute the loss
loss = loss = -np.sum(targets * np.log(predict))
# Backpropagation
dC_da2 = 2 * np.sum(predict - targets) / N
dC_dz2 = (2 / N) * (predict - targets) * a2 * (1 - predict)
dC_dw2 = np.dot(a1.T, dC_dz2)
dC_db2 = dC_dz2
dC_da1 = np.dot(dC_dz2, w2.T)
dC_dz1 = dC_da1 * a1 * (1 - a1)
dC_dw1 = np.dot(a0.T, dC_dz1)
dC_db1 = np.sum(dC_dz1, axis=0)
# Update weights and biases
w2 = w2 - learning_rate * dC_dw2
b2 = b2 - learning_rate * dC_db2
w1 = w1 - learning_rate * dC_dw1
b1 = b1 - learning_rate * dC_db1
return w1, b1, w2, b2, loss
def accuracy(labels, predictions):
return np.mean(labels == predictions)
def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch):
train_accuracies = []
batch_size, d_in = data_train.shape
train_accuracies = []
N = len(labels_train) # Number of samples in the training set
# Ensure num_epoch is an integer
num_epoch = int(np.isscalar(num_epoch))
for i in range(num_epoch):
# Forward propagation
z1 = np.dot(data_train, w1) + b1
a1 = sigmoid(z1)
z2 = np.dot(a1, w2) + b2
a2 = softmax(z2)
predicted_labels = np.argmax(a2, axis = 1) # the predicted labels
# Compute the accuracy on the training set
train_accuracy = accuracy(labels_train, predicted_labels)
train_accuracies.append(train_accuracy)
# train_accurary = np.mean(np.array(predicted_labels) == np.array(labels_train))
return w1, b1, w2, b2, train_accuracies
def test_mlp(w1, b1, w2, b2, data_test, labels_test):
predictions = []
# Forward propagation
a0 = data_test
z1 = np.dot(a0, w1) + b1
a1 = sigmoid(z1)
z2 = np.dot(a1, w2) + b2
a2 = sigmoid(z2)
predicted_labels_test = np.argmax(a2, axis=1)
test_accuracy = np.mean(predicted_labels_test == labels_test)
return test_accuracy
def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs):
d_in = data_train.shape[1]
d_out = len(np.unique(labels_train))
# Initialize weights and biases
w1 = np.random.randn(d_in, d_h)
b1 = np.zeros((1, d_h))
w2 = np.random.randn(d_h, d_out)
b2 = np.zeros((1, d_out))
# Train MLP
w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epochs)
# Test MLP
final_test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
return train_accuracies, final_test_accuracy
if __name__ == "__main__":
# Create train and test datasets
data, labels = read_cifar(r"C:\Users\etulyon1\OneDrive\Desktop\ECL\Apprentissage profond & Intelligence Artificielle\BE1\image-classification\data\cifar-10-batches-py")
split_factor = 0.9
a, b, c, d = split_dataset(data, labels, split_factor)
# Define the network hyper-parameters and train it
d_h = 64
learning_rate = 0.1
num_epoch = 100
train_accuracies, final_accuracy = run_mlp_training(a, b, c, d, d_h, learning_rate, num_epoch)
print("The accuracy of the network on the test dataset is " + str(final_accuracy) + "%")
# Plot the evolution of the accuracy with the number training steps
plt.plot(train_accuracies)
plt.xlabel("Number of training steps")
plt.ylabel("Accuracy (in %)")
plt.title("Accuracy of the Neural Network depending on the number of iterations of training")
plt.show()