diff --git a/knn.py b/knn.py index 1bf127dfe535a0000c5b171df1a0e23ea9035d4f..b17fa4c95884e15d032f8090df65325fe31404c3 100644 --- a/knn.py +++ b/knn.py @@ -10,64 +10,26 @@ import os #Cette méthode n'est pas la plus efficace aujourd'hui, mais permet d'avoir une #première idée def distance_matrix(matrix1, matrix2): - # Calculate the squared sum of matrix1 sum_matrix1 = np.sum(matrix1**2, axis=1, keepdims=True) - - # Calculate the squared sum of matrix2 sum_matrix2 = np.sum(matrix2**2, axis=1, keepdims=True) - - # Compute the dot product between matrix1 and matrix2 dot_product = np.dot(matrix1, matrix2.T) - - # Compute the Euclidean distance matrix dists = np.sqrt(sum_matrix1 - 2 * dot_product + sum_matrix2.T) - return dists -#Test -# Create two example matrices -matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) -matrix2 = np.array([[10, 11, 12], [13, 14, 15], [16, 17, 18]]) - -# Compute the Euclidean distance matrix -dists = distance_matrix(matrix1, matrix2) - +###Test sur 2 matrices +##matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) +##matrix2 = np.array([[10, 11, 12], [13, 14, 15], [16, 17, 18]]) +##dists = distance_matrix(matrix1, matrix2) ##print(dists) #La fonction knn_predicts est assez simple : #On regarde la matrice de distance pour une image, on la trie dans l'ordre croissant #(donc avec les images les plus "proches" d'abord), puis on regarde les labels #des k premières images : on prend ensuite le label qui revient le plus -##def knn_predict(dists, labels_train, k): -## # Initialize an empty array to store the predicted labels -## predicted_labels = [] -## # Loop through each row in the distance matrix (each test example) -## for i in range(dists.shape[0]): -## # Get the distances for the current test example -## distances = dists[i] -## # Get the indices of the k nearest neighbors -## nearest_indices = np.argsort(distances)[:k] -## -## # Get the labels of the k nearest neighbors -## nearest_labels = [labels_train[idx] for idx in nearest_indices] -## -## # Use a voting mechanism to determine the predicted label -## predicted_label = max(set(nearest_labels), key=nearest_labels.count) -## -## # Append the predicted label to the result array -## predicted_labels.append(predicted_label) -## return predicted_labels - def knn_predict(dists, labels_train, k): - # Use np.argpartition to find the indices of the k nearest neighbors for all test examples nearest_indices = np.argpartition(dists, k, axis=1)[:, :k] - - # Get the labels of the k nearest neighbors for all test examples nearest_labels = labels_train[nearest_indices] - - # Use a voting mechanism to determine the predicted labels for all test examples predicted_labels = np.array([np.argmax(np.bincount(nearest_labels[i])) for i in range(nearest_labels.shape[0])]) - return predicted_labels #Dans cette fonction on calcule le taux de classification, @@ -75,50 +37,33 @@ def knn_predict(dists, labels_train, k): #d'observations. Pour cela, on va d'abord entrainer l'algorithme avec #la base d'entraînement, puis on va vérifier avec la base de test def evaluate_knn(data_train,labels_train,data_test,labels_test,k): - # Calculate the distance matrix between the training and test data dists = distance_matrix(data_test, data_train) - - # Use the knn_predict function to get predicted labels for the test data predicted_labels = knn_predict(dists, labels_train, k) - - # Initialize a variable to count the number of correct predictions correct_predictions = 0 - - # Loop through the predicted and true labels and count the correct predictions for predicted_label, true_label in zip(predicted_labels, labels_test): if predicted_label == true_label: correct_predictions += 1 - - # Calculate accuracy as the ratio of correct predictions to the total number of test instances accuracy = correct_predictions / len(labels_test) * 100 return accuracy if __name__ == "__main__": data_folder = 'data/cifar-10-batches-py' - batch_filename = 'data_batch_1' # Adjust this to the specific batch file you want to read - + batch_filename = 'data_batch_1' batch_path = os.path.join(data_folder, batch_filename) - data, labels = read_cifar.read_cifar_batch(batch_path) data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9) - print(len(data_train),len(data_test)) - # Initialize lists to store k values and corresponding accuracies + # Liste pour les valeurs de k k_values = list(range(1, 21)) accuracies = [] - # Calculate accuracy for different values of k + # CAccuracy pour les différentes valeurs de k for k in k_values: accuracy = evaluate_knn(data_train, labels_train, data_test, labels_test, k) accuracies.append(accuracy) - # Create a plot of accuracy vs. k values plt.figure(figsize=(10, 6)) plt.plot(k_values, accuracies, marker='o', linestyle='-', color='b') - plt.title('Accuracy vs. k for k-Nearest Neighbors') + plt.title('Accuracy as a function of k, for k-Nearest Neighbors') plt.xlabel('k (Number of Neighbors)') plt.ylabel('Accuracy (%)') plt.grid(True) - - # Save the plot as "knn.png" in the "results" directory plt.savefig('results/knn.png') - - # Show the plot (optional) plt.show() diff --git a/mlp.py b/mlp.py index 22008e84e745c42b77a12ef0237a5d4de3b1bbd1..ec9c76fb535c7985a108c0e86b6b3471bb899ea5 100644 --- a/mlp.py +++ b/mlp.py @@ -18,18 +18,18 @@ b2 = np.zeros((1, d_out)) # second layer biaises data = np.random.rand(N, d_in) # create a random data targets = np.random.rand(N, d_out) # create a random targets -# Sigmoid function +# Fonction sigmoide, utilisée par la suite pour le calcul des matrices a1 et a2 def sigmoid(z): - return 1 / (1 + np.exp(-np.clip(z, -30, 30))) #to avoid overflow + return 1 / (1 + np.exp(-np.clip(z, -30, 30))) #pour éviter l'overflow -# Forward pass +# Fonction Forward pass pour créer les premières matrices a0,z1,a1,z2,a2, ainsi que les prédictions def forward_pass(data, w1, b1, w2, b2): - a0 = data # the data are the input of the first layer - z1 = np.matmul(a0, w1) + b1 # input of the hidden layer - a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) - z2 = np.matmul(a1, w2) + b2 # input of the output layer - a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) - predictions = a2 # the predicted values are the outputs of the output layer + a0 = data + z1 = np.matmul(a0, w1) + b1 # entrée pour l'hidden layer + a1 = sigmoid(z1) # sortie pour l'hidden layer + z2 = np.matmul(a1, w2) + b2 # entrée pour l'output layer + a2 = sigmoid(z2) # sortie pour l'output layer + predictions = a2 # les prédictions sont la matrice de sortie de l'output layer return (a0,z1,a1,z2,a2,predictions) # Compute loss (MSE) @@ -50,7 +50,7 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate = 0.01): grad_w1 = np.matmul(data.T, grad_z1) grad_b1 = np.sum(grad_z1, axis=0, keepdims=True) - # Update weights and biases using gradient descent + # Mis à jour des weights et biases en utilisant le gradient descendant w1 -= learning_rate * grad_w1 b1 -= learning_rate * grad_b1 w2 -= learning_rate * grad_w2 @@ -58,36 +58,33 @@ def learn_once_mse(w1,b1,w2,b2,data,targets,learning_rate = 0.01): return w1, b1, w2, b2, loss +#Cette fonction tpermet d'éviter les trop grands nombres +def softmax(x): + return(np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum()) -# Forward pass +# Nouvelle fonciton forward pass utilisant la fonction softmax def forward(data, w1, b1, w2, b2): a0 = data # the data are the input of the first layer z1 = np.matmul(a0, w1) + b1 # input of the hidden layer a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) z2 = np.matmul(a1, w2) + b2 # input of the output layer - a2 = softmax_stable(z2) # output of the output layer (sigmoid activation function) + a2 = softmax(z2) # output of the output layer (sigmoid activation function) predictions = a2 # the predicted values are the outputs of the output layer return (a0,z1,a1,z2,a2,predictions) +# Fonction transformant chaque label de classe en un vecteur de la taille de la classe. def one_hot(labels): num_classes = np.max(labels) + 1 one_hot_matrix = np.eye(num_classes)[labels] return one_hot_matrix -def softmax_stable(x): - #We use this function to avoid computing to big numbers - return(np.exp(x - np.max(x)) / np.exp(x - np.max(x)).sum()) - def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): a0,z1,a1,z2,a2,predictions = forward(data, w1, b1, w2, b2) - N = len(labels_train) - labels_train = one_hot(labels_train) - # Compute the gradient of the loss with respect to the predictions (a2) grad_z2 = a2 - labels_train # Backpropagation @@ -98,7 +95,7 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): grad_w1 = np.matmul(data.T, grad_z1) grad_b1 = np.sum(grad_z1, axis=0, keepdims=True) - # Update weights and biases using gradient descent + # Mis à jour des weights et biases en utilisant le gradient descendant w1 -= learning_rate * grad_w1 b1 -= learning_rate * grad_b1 w2 -= learning_rate * grad_w2 @@ -110,11 +107,11 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): return w1, b1, w2, b2, loss -#Fonction de prédiction qui pour un vecteur donné renvoie la classe prédite (cad l'indice de l'élément le plus élevé) +#Fonction de prédiction qui pour un vecteur donné renvoie la classe prédite def predict_class(predictions): return np.argmax(predictions, axis=1) -#Fonction taux de réussite qui compare une liste de prédictions à la liste des résultats et renvoie la proportion de vraies prédictions +#Fonction taux de réussite qui compare une liste de prédictions à la liste des résultats et renvoie la proportion de prédictions correctes def accuracy(y_true, y_pred): return np.mean(y_true == y_pred) @@ -134,15 +131,15 @@ def test_mlp(w1,b1,w2,b2, data_test,labels_test): return test_accuracy def run_mlp_training(data_train,labels_train,data_test,labels_test,d_h,learning_rate,num_epoch): - N = data_train.shape[0] # number of input data - d_in = data_train.shape[1] # input dimension - d_out = np.max(labels_train)+1 # output dimension (number of neurons of the output layer) - - # Random initialization of the network weights and biaises - w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights - b1 = np.zeros((1, d_h)) # first layer biaises - w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights - b2 = np.zeros((1, d_out)) # second layer biaises + N = data_train.shape[0] + d_in = data_train.shape[1] + d_out = np.max(labels_train)+1 + + # Initialisation du réseau + w1 = 2 * np.random.rand(d_in, d_h) - 1 + b1 = np.zeros((1, d_h)) + w2 = 2 * np.random.rand(d_h, d_out) - 1 + b2 = np.zeros((1, d_out)) w1,b1,w2,b2, train_accuracies = train_mlp(w1,b1,w2,b2, data_train, labels_train, learning_rate, num_epoch) test_accuracy = test_mlp(w1,b1,w2,b2, data_test,labels_test) @@ -151,7 +148,7 @@ def run_mlp_training(data_train,labels_train,data_test,labels_test,d_h,learning_ if __name__ == "__main__": data_folder = 'data/cifar-10-batches-py' - batch_filename = 'data_batch_1' # Adjust this to the specific batch file you want to read + batch_filename = 'data_batch_1' batch_path = os.path.join(data_folder, batch_filename) data, labels = read_cifar.read_cifar_batch(batch_path) data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, 0.9) diff --git a/read_cifar.py b/read_cifar.py index a4bb3b3f66fd15ad40ba62f7bd16d18dabd7b55b..c7a65cc792788bdd4cfcb60a9d1a0948f3ae088a 100644 --- a/read_cifar.py +++ b/read_cifar.py @@ -29,20 +29,15 @@ def split_dataset(data, labels, split): if split < 0 or split > 1: raise ValueError("The split parameter must be a float between 0 and 1.") - # Get the number of samples in the dataset num_samples = len(data) - # Calculate the number of samples for the training set num_train_samples = int(num_samples * split) - # Create a random permutation of indices for shuffling indices = np.random.permutation(num_samples) - # Split the indices into training and test sets train_indices = indices[:num_train_samples] test_indices = indices[num_train_samples:] - # Split the data and labels based on the shuffled indices data_train = data[train_indices] labels_train = labels[train_indices] data_test = data[test_indices] @@ -53,22 +48,14 @@ def split_dataset(data, labels, split): if __name__ == "__main__": data_folder = 'data/cifar-10-batches-py' - batch_filename = 'data_batch_1' # Adjust this to the specific batch file you want to read + batch_filename = 'data_batch_1' batch_path = os.path.join(data_folder, batch_filename) data, labels = read_cifar_batch(batch_path) -## # Example: Printing the shape of data and labels -## print("Data shape:", data.shape) -## print("Labels shape:", labels.shape) - # Example: Printing data and labels for all files from the folder data1, labels1 = read_cifar(data_folder) print("Data :", data1) print("Labels :", labels1) -## data_train, labels_train, data_test, labels_test = split_dataset(data, labels, 0.8) -## # Example: Printing the shape of data test and train : -## print("Data train shape:", data_train.shape) -## print("Data test shape:", data_test.shape) diff --git a/results/knn.png b/results/knn.png index 587f48dfc222e49ea0d9ccb812fc84f8dbaefd0e..1784b0d40928b956062e5af4e66a4afe0b07c81b 100644 Binary files a/results/knn.png and b/results/knn.png differ diff --git a/results/mlp.png b/results/mlp.png index ef972fe86b943ddfe2aa46fcb6eb076549dd0378..8e67ea2cfade63a55118596f9b59893a64641b53 100644 Binary files a/results/mlp.png and b/results/mlp.png differ