Skip to content
Snippets Groups Projects
Commit 7baefb6f authored by oscarchaufour's avatar oscarchaufour
Browse files

mlp finished + result

parent f4d3c48d
Branches
No related tags found
No related merge requests found
...@@ -54,17 +54,6 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k) : ...@@ -54,17 +54,6 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k) :
return classification_rate return classification_rate
def plot_accuracy(data_train, labels_train, data_test, labels_test, k_max) :
Y = []
for k in range(1, k_max+1) :
Y += [evaluate_knn(data_train, labels_train, data_test, labels_test, k)]
plt.plot(list(range(1, k_max+1)), Y)
plt.xlabel('k (Number of Neighbors)')
plt.ylabel('Accuracy')
plt.savefig('results/knn.png')
if __name__ == "__main__" : if __name__ == "__main__" :
t1 = time.time() t1 = time.time()
# # Example distance matrix, training labels, and k value # # Example distance matrix, training labels, and k value
......
...@@ -6,6 +6,10 @@ Created on Fri Oct 27 16:48:16 2023 ...@@ -6,6 +6,10 @@ Created on Fri Oct 27 16:48:16 2023
""" """
import numpy as np import numpy as np
import read_cifar
import matplotlib.pyplot as plt
from scipy.special import expit
from tqdm import tqdm
def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) : def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) :
...@@ -22,9 +26,9 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) : ...@@ -22,9 +26,9 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) :
dCdZ2 = dCdA2 * (a2 - a2**2) dCdZ2 = dCdA2 * (a2 - a2**2)
dCdW2 = np.matmul(a1.T, dCdZ2) dCdW2 = np.matmul(a1.T, dCdZ2)
dCdB2 = (1/N) * np.sum(dCdZ2, axis=0, keepdims=True) dCdB2 = (1/N) * np.sum(dCdZ2, axis=0, keepdims=True)
dCdA1 = np.matmul(dCdZ2, w2.T) dCdA1 = (1/N) * np.matmul(dCdZ2, w2.T)
dCdZ1 = dCdA1 * (a1 - a1**2) dCdZ1 = dCdA1 * (a1 - a1**2)
dCdW1 = np.matmul(a0.T, dCdZ1) dCdW1 = (1/N) * np.matmul(a0.T, dCdZ1)
dCdB1 = (1/N) * np.sum(dCdZ1, axis=0, keepdims=True) dCdB1 = (1/N) * np.sum(dCdZ1, axis=0, keepdims=True)
# one gradient descent step # one gradient descent step
...@@ -33,8 +37,132 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) : ...@@ -33,8 +37,132 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) :
w2 -= dCdW2 * learning_rate w2 -= dCdW2 * learning_rate
b2 -= dCdB2 * learning_rate b2 -= dCdB2 * learning_rate
# new a2 calculation
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function)
predictions = a2
loss = np.mean(np.square(predictions - targets)) loss = np.mean(np.square(predictions - targets))
return w1, b1, w2, b2, loss return w1, b1, w2, b2, loss
def one_hot(labels) :
D = len(labels)
L = max(labels) # labels is an int array
one_hot_matrix = np.zeros((D, L+1))
for k in range(len(labels)) :
label = labels[k]
one_hot_matrix[k, label] = 1
return one_hot_matrix
def softmax(x):
e_x = np.exp(x - np.max(x)) # Soustraction du max pour éviter les problèmes de stabilité numérique
return e_x / e_x.sum()
def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate) :
y = one_hot(labels_train)
a0 = data # the data are the input of the first layer
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = expit(z1) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = expit(z2)
#computing the softmax predictions and loss
predictions = softmax(a2)
loss = -np.sum(y*np.log(predictions))/(float(predictions.shape[0]))#cross entropy loss
predictions = a2 # the predicted values are the outputs of the output layer
N = data.shape[0]
# calculation of partial derivates of C
dCdZ2 = a2 - y
dCdW2 = (1/N) * np.matmul(a1.T, dCdZ2)
dCdB2 = (1/N) * np.sum(dCdZ2, keepdims=True)
dCdA1 = np.matmul(dCdZ2, w2.T)
dCdZ1 = dCdA1 * (a1 - np.square(a1))
dCdW1 = (1/N) * np.matmul(a0.T, dCdZ1)
dCdB1 = (1/N) * np.sum(dCdZ1, keepdims=True)
# one gradient descent step
w1 -= dCdW1 * learning_rate
b1 -= dCdB1 * learning_rate
w2 -= dCdW2 * learning_rate
b2 -= dCdB2 * learning_rate
# accuracy calculation
predictions_vect = np.argmax(predictions, axis=1)
number_true_prediction = np.sum(labels_train == predictions_vect)
number_total_prediction = labels_train.shape[0]
accuracy = number_true_prediction/number_total_prediction
return w1, b1, w2, b2, accuracy
def train_mlp(w1, b1, w2, b2, data, labels_train, learning_rate, num_epoch) :
train_accuracies = []
for k in tqdm(range(num_epoch)) :
w1, b1, w2, b2, accuracy = learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate)
train_accuracies.append(accuracy)
return w1, b1, w2, b2, train_accuracies
def test_mlp(w1, b1, w2, b2, data_test, labels_test) :
# prediction calculation
a0 = data_test
z1 = np.matmul(a0, w1) + b1 # input of the hidden layer
a1 = expit(z1) # output of the hidden layer (sigmoid activation function)
z2 = np.matmul(a1, w2) + b2 # input of the output layer
a2 = np.exp(z2) / np.sum(np.exp(z2)) # output of the output layer (sigmoid activation function)
predictions = a2
predictions_vect = np.argmax(predictions, axis=1)
# accuracy calculation
number_true_prediction = np.sum(labels_test == predictions_vect)
number_total_prediction = len(labels_test)
test_accuracy = number_true_prediction/number_total_prediction
return test_accuracy
def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch) :
d_out = np.max(labels_train) + 1 # output dimension (number of neurons of the output layer
d_in = data_train.shape[1]
# Random initialization of the network weights and biaises
w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights
b1 = np.zeros((1, d_h)) # first layer biaises
w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights
b2 = np.zeros((1, d_out)) # second layer biaises
w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch)
test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test)
return train_accuracies, test_accuracy
if __name__ == "__main__" :
# open, read and split the data using read_cifar script
file = "./data/cifar-10-python/"
data, labels = read_cifar.read_cifar(file)
data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9)
# train the MLP
d_h=64
learning_rate=0.1
num_epoch=100
train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch)
# Plot the figure
plt.figure(figsize = (12,8))
plt.plot(np.array(train_accuracies)*100, color='r', label='split factor : 0.9')
plt.title("Multilayer Perceptron training accuracy")
plt.legend()
plt.xlabel('number of epochs (num_epoch)')
plt.ylabel('accuracy (%)')
plt.show()
plt.savefig('results/mlp.png')
\ No newline at end of file
...@@ -24,7 +24,7 @@ def read_cifar (batch_dir) : ...@@ -24,7 +24,7 @@ def read_cifar (batch_dir) :
data_batches = [] data_batches = []
label_batches = [] label_batches = []
for i in range(1,4) : for i in range(1,6) :
batch_filename = f'data_batch_{i}' batch_filename = f'data_batch_{i}'
batch_path = os.path.join(batch_dir, batch_filename) batch_path = os.path.join(batch_dir, batch_filename)
data, labels = read_cifar_batch(batch_path) data, labels = read_cifar_batch(batch_path)
......
results/mlp.png

23.1 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment