diff --git a/knn.py b/knn.py index 1fd7fa48ac3bf08a5583b63d19aa2920f107c265..4dc7d61363961d8093d446d6df5ff8eca607d050 100644 --- a/knn.py +++ b/knn.py @@ -52,18 +52,7 @@ def evaluate_knn(data_train, labels_train, data_test, labels_test, k) : number_total_prediction = labels_test.shape[0] classification_rate = number_true_prediction/number_total_prediction - return classification_rate - -def plot_accuracy(data_train, labels_train, data_test, labels_test, k_max) : - Y = [] - for k in range(1, k_max+1) : - Y += [evaluate_knn(data_train, labels_train, data_test, labels_test, k)] - plt.plot(list(range(1, k_max+1)), Y) - plt.xlabel('k (Number of Neighbors)') - plt.ylabel('Accuracy') - plt.savefig('results/knn.png') - - + return classification_rate if __name__ == "__main__" : t1 = time.time() diff --git a/mlp.py b/mlp.py index 825ce394b75b7307eeeace901b920186255b6d9c..3613083a96b6b44db7d536acec5482de738c6558 100644 --- a/mlp.py +++ b/mlp.py @@ -6,6 +6,10 @@ Created on Fri Oct 27 16:48:16 2023 """ import numpy as np +import read_cifar +import matplotlib.pyplot as plt +from scipy.special import expit +from tqdm import tqdm def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) : @@ -22,9 +26,9 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) : dCdZ2 = dCdA2 * (a2 - a2**2) dCdW2 = np.matmul(a1.T, dCdZ2) dCdB2 = (1/N) * np.sum(dCdZ2, axis=0, keepdims=True) - dCdA1 = np.matmul(dCdZ2, w2.T) + dCdA1 = (1/N) * np.matmul(dCdZ2, w2.T) dCdZ1 = dCdA1 * (a1 - a1**2) - dCdW1 = np.matmul(a0.T, dCdZ1) + dCdW1 = (1/N) * np.matmul(a0.T, dCdZ1) dCdB1 = (1/N) * np.sum(dCdZ1, axis=0, keepdims=True) # one gradient descent step @@ -33,8 +37,132 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate) : w2 -= dCdW2 * learning_rate b2 -= dCdB2 * learning_rate + # new a2 calculation + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) + predictions = a2 + loss = np.mean(np.square(predictions - targets)) return w1, b1, w2, b2, loss + +def one_hot(labels) : + D = len(labels) + L = max(labels) # labels is an int array + one_hot_matrix = np.zeros((D, L+1)) + for k in range(len(labels)) : + label = labels[k] + one_hot_matrix[k, label] = 1 + return one_hot_matrix + +def softmax(x): + e_x = np.exp(x - np.max(x)) # Soustraction du max pour éviter les problèmes de stabilité numérique + return e_x / e_x.sum() + + +def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate) : + y = one_hot(labels_train) + + a0 = data # the data are the input of the first layer + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = expit(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = expit(z2) + + #computing the softmax predictions and loss + predictions = softmax(a2) + loss = -np.sum(y*np.log(predictions))/(float(predictions.shape[0]))#cross entropy loss + + predictions = a2 # the predicted values are the outputs of the output layer + N = data.shape[0] + + # calculation of partial derivates of C + dCdZ2 = a2 - y + dCdW2 = (1/N) * np.matmul(a1.T, dCdZ2) + dCdB2 = (1/N) * np.sum(dCdZ2, keepdims=True) + dCdA1 = np.matmul(dCdZ2, w2.T) + dCdZ1 = dCdA1 * (a1 - np.square(a1)) + dCdW1 = (1/N) * np.matmul(a0.T, dCdZ1) + dCdB1 = (1/N) * np.sum(dCdZ1, keepdims=True) + + # one gradient descent step + w1 -= dCdW1 * learning_rate + b1 -= dCdB1 * learning_rate + w2 -= dCdW2 * learning_rate + b2 -= dCdB2 * learning_rate + + # accuracy calculation + predictions_vect = np.argmax(predictions, axis=1) + number_true_prediction = np.sum(labels_train == predictions_vect) + number_total_prediction = labels_train.shape[0] + accuracy = number_true_prediction/number_total_prediction + + return w1, b1, w2, b2, accuracy + +def train_mlp(w1, b1, w2, b2, data, labels_train, learning_rate, num_epoch) : + train_accuracies = [] + for k in tqdm(range(num_epoch)) : + w1, b1, w2, b2, accuracy = learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate) + train_accuracies.append(accuracy) + + return w1, b1, w2, b2, train_accuracies + + +def test_mlp(w1, b1, w2, b2, data_test, labels_test) : + # prediction calculation + a0 = data_test + z1 = np.matmul(a0, w1) + b1 # input of the hidden layer + a1 = expit(z1) # output of the hidden layer (sigmoid activation function) + z2 = np.matmul(a1, w2) + b2 # input of the output layer + a2 = np.exp(z2) / np.sum(np.exp(z2)) # output of the output layer (sigmoid activation function) + predictions = a2 + predictions_vect = np.argmax(predictions, axis=1) + + # accuracy calculation + number_true_prediction = np.sum(labels_test == predictions_vect) + number_total_prediction = len(labels_test) + test_accuracy = number_true_prediction/number_total_prediction + + return test_accuracy + + +def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch) : + d_out = np.max(labels_train) + 1 # output dimension (number of neurons of the output layer + d_in = data_train.shape[1] + # Random initialization of the network weights and biaises + w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights + b1 = np.zeros((1, d_h)) # first layer biaises + w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights + b2 = np.zeros((1, d_out)) # second layer biaises + + w1, b1, w2, b2, train_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) + test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + return train_accuracies, test_accuracy + +if __name__ == "__main__" : + # open, read and split the data using read_cifar script + file = "./data/cifar-10-python/" + data, labels = read_cifar.read_cifar(file) + data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9) + + # train the MLP + d_h=64 + learning_rate=0.1 + num_epoch=100 + + train_accuracies, test_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epoch) + + # Plot the figure + plt.figure(figsize = (12,8)) + plt.plot(np.array(train_accuracies)*100, color='r', label='split factor : 0.9') + plt.title("Multilayer Perceptron training accuracy") + plt.legend() + plt.xlabel('number of epochs (num_epoch)') + plt.ylabel('accuracy (%)') + plt.show() + plt.savefig('results/mlp.png') + \ No newline at end of file diff --git a/read_cifar.py b/read_cifar.py index f25324d500f4cdcb148bd6dcb38f9b482cbe749e..3cc5db39208cfa0dc843297e52e0d0659e7ea41f 100644 --- a/read_cifar.py +++ b/read_cifar.py @@ -24,7 +24,7 @@ def read_cifar (batch_dir) : data_batches = [] label_batches = [] - for i in range(1,4) : + for i in range(1,6) : batch_filename = f'data_batch_{i}' batch_path = os.path.join(batch_dir, batch_filename) data, labels = read_cifar_batch(batch_path) diff --git a/results/mlp.png b/results/mlp.png new file mode 100644 index 0000000000000000000000000000000000000000..0615c2f516317665bcec9b40c91015ff8a58715c Binary files /dev/null and b/results/mlp.png differ