diff --git a/__pycache__/mlp.cpython-312.pyc b/__pycache__/mlp.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..253dd5416f52e6415215e428a36543d7d410145a Binary files /dev/null and b/__pycache__/mlp.cpython-312.pyc differ diff --git a/__pycache__/read_cifar.cpython-312.pyc b/__pycache__/read_cifar.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09de6b65aa70c4242958f45a2b8fc2d3961e731d Binary files /dev/null and b/__pycache__/read_cifar.cpython-312.pyc differ diff --git a/final_run.py b/final_run.py index b3137465c9752f3a80c8aabc0e3c02c1444b37a9..d1029055c3fc5e07d9447ee089c0b3c5a69af62b 100644 --- a/final_run.py +++ b/final_run.py @@ -1,11 +1,14 @@ -from read_cifar import read_cifar, split_dataset +from read_cifar import read_cifar_batch, split_dataset from mlp import run_mlp_training import matplotlib.pyplot as plt if __name__ == "__main__": - data, labels = read_cifar() + data, labels = read_cifar_batch("data_batch_1") #On test le mlp avec 1 seul batch (sinon trop long) data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split=0.9) list_accuracies, final_accuracy = run_mlp_training(data_train, labels_train, data_test, labels_test, d_h=64, learning_rate=0.1, num_epoch=100) plt.plot(list_accuracies) + plt.xlabel('Number of epoch') + plt.ylabel('Accuracy') + plt.grid(True) + plt.savefig('results/mlp.png') plt.show() - #plt.savefig("mlp.png") diff --git a/knn.py b/knn.py index 6e23aa8d30c897eb1f3546b386f020c623b2d0f4..6d26f13386e473f97b1f70be3bc97c22951cf691 100644 --- a/knn.py +++ b/knn.py @@ -1,5 +1,5 @@ import numpy as np -from read_cifar import read_cifar, split_dataset +from read_cifar import split_dataset, read_cifar_batch import matplotlib.pyplot as plt def distance_matrix(matrix_a: np.ndarray, matrix_b: np.ndarray): @@ -15,7 +15,7 @@ def knn_predict(dists: np.ndarray, labels_train: np.ndarray, k:int): labels_predicts = np.zeros(np.size(dists, 0)) for i in range(np.size(labels_predicts, 0)): #On extrait les indices des k valeurs plus petites (des k plus proches voisins) - k_neighbors_index = np.argmin(dists[i, :], np.sort(dists[i, :])[:k]) + k_neighbors_index = np.argpartition(dists[i,:], k)[:k] #On compte la classe la plus présente parmi les k voisins les plus proches labels_k_neighbors = labels_train[k_neighbors_index] #On compte le nombre d'occurence des classes parmis les k @@ -36,10 +36,14 @@ def evaluate_knn(data_train:np.ndarray, labels_train: np.ndarray, data_test:np.n return accuracy def plot_knn(data_train:np.ndarray, labels_train: np.ndarray, data_test:np.ndarray, labels_test:np.ndarray, n: int): - accuracy_vector = np.zeros(n) + accuracy_vector = np.zeros(n+1) for k in range(1, n+1): - accuracy_vector[k] = evaluate_knn(data_train, labels_train, data_test, labels_test) + accuracy_vector[k] = evaluate_knn(data_train, labels_train, data_test, labels_test, k) plt.plot(accuracy_vector) + plt.xlabel('Number of Neighbors') + plt.ylabel('Accuracy') + plt.grid(True) + plt.savefig('results/knn.png') plt.show() return @@ -48,7 +52,8 @@ def plot_knn(data_train:np.ndarray, labels_train: np.ndarray, data_test:np.ndarr if __name__ == "__main__": - data, labels = read_cifar() + data, labels = read_cifar_batch("data_batch_1") #On test le KNN avec 1 seul batch (sinon trop long) data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split=0.8) k = 5 #Nombre de voisins - accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k) \ No newline at end of file + accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k) + plot_knn(data_train, labels_train, data_test, labels_test, 20) \ No newline at end of file diff --git a/mlp.py b/mlp.py index f37afc7bf6ed77a31494f808bc670cdb31f4e76d..ae5a8aaadfae8bb9ef158519c72ec19cd6452806 100644 --- a/mlp.py +++ b/mlp.py @@ -45,15 +45,21 @@ def learn_once_mse(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarra def one_hot(label=np.ndarray): """ - Encode une suite d'entier en binaire : encodeur one-hot. + Encode les labels en mettant un 1 à l'indice correspondant à la classe du label, et des 0 sinon. :label: La suite d'entier à encoder. :return: la matrice encodée. """ - result = np.zeros((np.size(label, 0), np.size(label, 0))) - for i in range(np.size(label, 0)): - result[i] = convert_integer_to_binary(label[i], np.size(label, 0)) - return result + num_classes = np.max(label) + 1 + one_hot_matrix = np.eye(num_classes)[label] + return one_hot_matrix + +def decode_class(encoded_labels:np.ndarray): + """ + Decode un vecteur encodé avec one_hot et renvoie un vecteur qui contient les classes correspondantes + """ + return np.argmax(encoded_labels, axis=1) + def convert_integer_to_binary(integer, size): """ @@ -74,6 +80,11 @@ def convert_integer_to_binary(integer, size): return np.array(binary) +def softmax(x): + return(np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum()) + +def sigmoid(z): + return 1 / (1 + np.exp(-np.clip(z, -30, 30))) #pour éviter l'overflow def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, data: np.ndarray, labels_train: np.ndarray, learning_rate: np.ndarray): """ @@ -90,18 +101,18 @@ def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: # Forward pass a0 = data # the data are the input of the first layer z1 = np.matmul(a0, w1) + b1 # input of the hidden layer - a1 = 1 / (1 + np.exp(-z1)) # output of the hidden layer (sigmoid activation function) + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) z2 = np.matmul(a1, w2) + b2 # input of the output layer - a2 = 1 / (1 + np.exp(-z2)) # output of the output layer (sigmoid activation function) + a2 = softmax(z2) # output of the output layer (sigmoid activation function) - encoded_vector = one_hot(labels_train) - dz2 = a2 - encoded_vector - dw2 = dz2*a1 - db2 = dz2 - da1 = dz2*np.sum(w2, axis=1) + encoded_labels_train = one_hot(labels_train) + dz2 = a2 - encoded_labels_train + dw2 = np.matmul(a1.T, dz2) + db2 = np.sum(dz2, axis=0, keepdims=True) + da1 = np.matmul(dz2, w2.T) dz1 = da1*a1*(1-a1) - dw1 = dz1*a0 - db1 = dz1 + dw1 = np.matmul(data.T, dz1) + db1 = np.sum(dz1, axis=0, keepdims=True) w1 -= learning_rate*dw1 w2 -= learning_rate*dw2 @@ -109,7 +120,8 @@ def learn_once_cross_entropy(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: b2 -= learning_rate*db2 m = np.size(data, 0) - loss = (-1/m) * np.sum(labels_train * np.log(a2) + (1 - labels_train) * np.log(1 - a2)) + eps = 10**(-9) + loss = (-1/m) * np.sum(encoded_labels_train * np.log(a2 + eps) + (1 - encoded_labels_train) * np.log(1 - a2 + eps)) return w1, b1, w2, b2, loss @@ -134,18 +146,15 @@ def train_mlp(w1: np.ndarray, b1: np.ndarray, w2: np.ndarray, b2: np.ndarray, da # Forward pass a0 = data_train z1 = np.matmul(a0, w1) + b1 - a1 = 1 / (1 + np.exp(-z1)) + a1 = sigmoid(z1) z2 = np.matmul(a1, w2) + b2 - a2 = 1 / (1 + np.exp(-z2)) - accuracies = compute_accuracy(a2, labels_train) + a2 = softmax(z2) + predictions = decode_class(a2) + accuracies.append(compute_accuracy(predictions, labels_train)) return w1, b1, w2, b2, accuracies def compute_accuracy(y_predict, y_target): - true = 0 - for i in range(np.size(y_predict, 0)): - if y_predict[i] == y_target[0]: - true += 1 - return true/np.size(y_predict, 0) + return np.mean(y_target == y_predict) def test_mlp(w1: np.ndarray, b1: np.ndarray, w2:np.ndarray, b2:np.ndarray, data_test: np.ndarray, labels_test: np.ndarray): """ @@ -157,16 +166,13 @@ def test_mlp(w1: np.ndarray, b1: np.ndarray, w2:np.ndarray, b2:np.ndarray, data_ :labels_train: output testing vector. :return: the accuracy of the test. """ - - - w1, b1, w2, b2, _ = train_mlp(w1, b1, w2, b2, data_test, labels_test) - a0 = data_test z1 = np.matmul(a0, w1) + b1 - a1 = 1 / (1 + np.exp(-z1)) + a1 = sigmoid(z1) z2 = np.matmul(a1, w2) + b2 - y_predict = 1 / (1 + np.exp(-z2)) - test_accuracy = compute_accuracy(y_predict, labels_test) + a2 = softmax(z2) + predictions = decode_class(a2) + test_accuracy = compute_accuracy(predictions, labels_test) return test_accuracy @@ -183,7 +189,7 @@ def run_mlp_training(data_train:np.ndarray, labels_train:np.ndarray, data_test:n """ #Number of neurons on the first and the last layer. d_in = np.size(data_train, 1) - d_out = np.size(data_test, 0) + d_out = np.max(labels_train)+1 # Random initialization of the network weights and biaises w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights @@ -192,7 +198,7 @@ def run_mlp_training(data_train:np.ndarray, labels_train:np.ndarray, data_test:n b2 = np.zeros((1, d_out)) # second layer biaises w1, b1, w2, b2, list_accuracies = train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch) - w1, b1, w2, b2, final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) + final_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) return list_accuracies, final_accuracy diff --git a/read_cifar.py b/read_cifar.py index ed1c8ecaabb455d9853941622261d2bd982cd58d..5748800c4a8617450a6e6acf865cc7d9c4d30630 100644 --- a/read_cifar.py +++ b/read_cifar.py @@ -18,7 +18,7 @@ def read_cifar(): #We are computing for the 5 first batchs data = [] labels = [] - for i in range(3): + for i in range(5): path_batch = directory + '/data_batch_' + str(i+1) with open(path_batch, 'rb') as fo: dict = pickle.load(fo, encoding='bytes') diff --git a/results/knn.png b/results/knn.png new file mode 100644 index 0000000000000000000000000000000000000000..7513f117f85bdf831afd9ae1fc79fc6aaba12ff7 Binary files /dev/null and b/results/knn.png differ diff --git a/results/mlp.png b/results/mlp.png new file mode 100644 index 0000000000000000000000000000000000000000..44bc25a4f118e99c60854a41e5100943f8771b5b Binary files /dev/null and b/results/mlp.png differ