diff --git a/README.md b/README.md index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a7526ba204b25dbab03927d4408a4620ea42e0df 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,36 @@ +Artificial Neural Network +Règles de la chaine pour le calcul du gradient : + + +2. ∂C/∂A(2) = 2/Nout * (A(2) - Y) + +3. ∂C/∂Z(2) = ∂C/∂A(2) * ∂A(2)/∂Z(2) + ∂C/∂Z(2) = ∂C/∂A(2) * σ'(Z(2)) + ∂C/∂Z(2) = ∂C/∂A(2) * σ(Z(2)) * (1-σ(Z(2))) + ∂C/∂Z(2) = ∂C/∂A(2) * A(2) * (1 - A(2)) + +4. ∂C/∂W(2) = ∂C/∂Z(2) * ∂Z(2)/∂W(2) + ∂C/∂W(2) = ∂C/∂Z(2) * A(1) + +5. ∂C/∂B(2) = ∂C/∂Z(2) * ∂Z(2)/∂B(2) + ∂C/∂B(2) = sum(∂C/∂Z(2), axis=0) + +6. ∂C/∂A(1) = ∂C/∂Z(2) * ∂Z(2)/∂A(1) + ∂C/∂A(1) = ∂C/∂Z(2) * W(2) + +7. ∂C/∂Z(1) = ∂C/∂A(1) * σ'(Z(1)) + ∂C/∂Z(1) = ∂C/∂A(1) * A(1) * (1 - A(1)) + +8. ∂C/∂W(1) = ∂C/∂Z(1) * A(0) + +9. ∂C/∂B(1) = sum(∂C/∂Z(1), axis=0) + +En analysant le graphique knn, on peut voir que le maximum de précision est obtenu pour k = 1, ce qui parait logique puisque chaque point +prendra alors son propre label. +L'autre pic de précision se situe aux alentours de k=7. On privilégiera donc ces valeurs afin d'obtenir la meilleure précision possible. +On voit que le temps d'éxécution pour les différentes valeurs de k reste stable autour de 28s. + +En analysant le graphique mlp, on observe que la précision oscille autour d'une valeur proche de 0.1 avec des pics. +Malgré tout, on peut conclure que l'apprentissage n'est pas très bon puisque la tendance générale est à la stagnation de la précision. + + diff --git a/knn.py b/knn.py index a2ce05f978b04d98d5de9968eca0e5726aa3cb31..de3c8d62b5509fd2be3bcd6338465d1dd6b0dd3f 100644 --- a/knn.py +++ b/knn.py @@ -1,54 +1,44 @@ import numpy as np from sklearn.metrics import accuracy_score -import matplotlib.pyplot as plt -def distance_matrix(matrix1, matrix2): - # Calculate the squared norms of each row in the input matrices - norms1 = np.sum(matrix1**2, axis=1, keepdims=True) - norms2 = np.sum(matrix2**2, axis=1, keepdims=True) - # Compute the dot product between the matrices - dot_product = np.dot(matrix1, matrix2.T) +def distance_matrix(mat1, mat2): + norms1 = np.sum(mat1**2, axis=1, keepdims=True) + norms2 = np.sum(mat2**2, axis=1, keepdims=True) - # Calculate the L2 Euclidean distance using the hint formula - dists = np.sqrt(norms1 - 2 * dot_product + norms2.T) + dot_product = np.dot(mat1, mat2.T) + dists = np.sqrt(norms1 - 2 * dot_product + norms2.T) return dists def knn_predict(dists, labels_train, k): - # Number of test samples num_test_samples = dists.shape[0] - # Initialize an array to store the predicted labels - predicted_labels = np.zeros(num_test_samples, dtype=labels_train.dtype) + pred_labels = np.zeros(num_test_samples, dtype=labels_train.dtype) for i in range(num_test_samples): - # Get the distances for the current test sample distances = dists[i] - # Find the indices of the k nearest neighbors + # On trouve les k indices avec la distance minimale k_nearest_indices = np.argsort(distances)[:k] - # Get the labels of the k nearest neighbors + # On récupère les labels de ces voisins k_nearest_labels = labels_train[k_nearest_indices] - # Use np.bincount to count the occurrences of each label - # and choose the label with the highest count - predicted_label = np.argmax(np.bincount(k_nearest_labels)) + # On compte les occurences des labels et on choisit celui qui apparait le plus + pred_label = np.argmax(np.bincount(k_nearest_labels)) - # Assign the predicted label to the current test sample - predicted_labels[i] = predicted_label + pred_labels[i] = pred_label - return predicted_labels + return pred_labels def evaluate_knn(data_train, labels_train, data_test, labels_test, k): - # Use the previously defined knn_predict function to get predictions predicted_labels = knn_predict(distance_matrix(data_test, data_train), labels_train, k) - # Calculate the accuracy by comparing predicted labels to actual labels + # Calcule la précision grâce à la prediction et à la valeur réelle accuracy = accuracy_score(labels_test, predicted_labels) return accuracy diff --git a/main.py b/main.py index 82b526f7562023d8dac0786efc12e9276dbb5097..e51199fc76855f177ff4b48412a6b98aaa420e6e 100644 --- a/main.py +++ b/main.py @@ -2,35 +2,50 @@ import read_cifar import knn import matplotlib.pyplot as plt import mlp +import time split = 0.9 d_h=64 learning_rate=0.1 -num_epochs=2 +num_epochs=100 batch_path = "data/cifar-10-python\cifar-10-batches-py" data, labels = read_cifar.read_cifar(batch_path) data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, split) -"""k_values = range(1, 21) +k_values = range(1, 21) accuracies = [] +times = [] for k in k_values: + start_time = time.time() accuracy = knn.evaluate_knn(data_train, labels_train, data_test, labels_test, k) + end_time = time.time() + execution_time=end_time-start_time + times.append(execution_time) accuracies.append(accuracy) + print(f"Accuracy for k={k}: {accuracy:.2f}, Time: {execution_time:.2f}s") plt.figure(figsize=(8, 6)) plt.plot(k_values, accuracies, marker='o') plt.title('KNN Accuracy vs. k') plt.xlabel('k') plt.ylabel('Accuracy') +plt.xticks(k_values) plt.grid(True) - -# On enregistre le graphique dans Results plt.savefig('results/knn.png') +plt.show() -plt.show()""" +plt.figure(figsize=(8, 6)) +plt.plot(k_values,times, marker='o') +plt.title('Execution time vs. k') +plt.xlabel('k') +plt.ylabel('time') +plt.xticks(k_values) +plt.grid(True) +plt.savefig('results/time_knn.png') +plt.show() train_accuracies,test_accuracy = mlp.run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs) @@ -41,4 +56,5 @@ def plot_learning_accuracy(train_accuracies): plt.ylabel("Training Accuracy") plt.title("MLP Training Accuracy") plt.savefig("results/mlp.png") + plot_learning_accuracy(train_accuracies) \ No newline at end of file diff --git a/mlp.py b/mlp.py index f451cbd25b638349bce68be630949e46c6670e1c..35639c08d6cd126cf9df72be696ec0985cbbbd71 100644 --- a/mlp.py +++ b/mlp.py @@ -1,5 +1,4 @@ import numpy as np -import matplotlib.pyplot as plt def sigmoid(x): return 1 / (1 + np.exp(-x)) @@ -8,7 +7,6 @@ def sigmoid_derivative(x): return x * (1 - x) def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): - # Forward pass a0 = data z1 = np.matmul(a0, w1) + b1 a1 = sigmoid(z1) @@ -16,7 +14,7 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): a2 = sigmoid(z2) predictions = a2 - # Compute loss (MSE) + # MSE loss = np.mean(np.square(predictions - targets)) # Backpropagation @@ -25,7 +23,7 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): delta_a1 = np.matmul(delta_z2, w2.T) delta_z1 = delta_a1 * sigmoid_derivative(a1) - # Update weights and biases + # Mise à jour weight et biais w2 -= learning_rate * np.matmul(a1.T, delta_z2) b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True) w1 -= learning_rate * np.matmul(a0.T, delta_z1) @@ -33,32 +31,40 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): return w1, b1, w2, b2, loss -def one_hot(labels, num_classes): - one_hot_matrix = np.zeros((len(labels), num_classes)) - one_hot_matrix[np.arange(len(labels)), labels] = 1 +def softmax(z): #evite les instabilités de loss + exp_z = np.exp(z - np.max(z, axis=1, keepdims=True)) + return exp_z / np.sum(exp_z, axis=1, keepdims=True) + + +def one_hot(labels): + n_classes = len(np.unique(labels)) + one_hot_matrix = np.zeros((labels.shape[0], n_classes)) + for i, label in enumerate(labels): + one_hot_matrix[i, label] = 1 return one_hot_matrix def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): - # Forward pass a0 = data z1 = np.matmul(a0, w1) + b1 a1 = sigmoid(z1) z2 = np.matmul(a1, w2) + b2 - a2 = sigmoid(z2) + a2 = softmax(z2) predictions = a2 - # Compute loss (cross-entropy) + # Calcule cross entropy m = len(labels_train) - one_hot_labels = one_hot(labels_train, num_classes=w2.shape[1]) - loss = -1/m * np.sum(one_hot_labels * np.log(predictions) + (1 - one_hot_labels) * np.log(1 - predictions)) + one_hot_labels = one_hot(labels_train) + epsilon = 1e-9 + predictions = np.clip(predictions, epsilon, 1 - epsilon) #évite les instabilités de loss + loss = -np.mean(one_hot_labels * np.log(predictions)) # Backpropagation delta_z2 = a2 - one_hot_labels delta_a1 = np.matmul(delta_z2, w2.T) delta_z1 = delta_a1 * sigmoid_derivative(a1) - # Update weights and biases + # Mise à jour weight et biais w2 -= learning_rate * np.matmul(a1.T, delta_z2) b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True) w1 -= learning_rate * np.matmul(a0.T, delta_z1) @@ -77,40 +83,17 @@ def train_mlp(w1, b1, w2, b2, data_train, labels_train, learning_rate, num_epoch train_accuracies = [] for epoch in range(num_epochs): - for i in range(len(data_train)): - data = data_train[i:i+1] - labels = labels_train[i:i+1] - - # Forward pass - a0 = data - z1 = np.matmul(a0, w1) + b1 - a1 = sigmoid(z1) - z2 = np.matmul(a1, w2) + b2 - a2 = sigmoid(z2) - predictions = a2 - - # Compute loss (cross-entropy) - one_hot_labels = one_hot(labels, num_classes=w2.shape[1]) - loss = -np.sum(one_hot_labels * np.log(predictions) + (1 - one_hot_labels) * np.log(1 - predictions)) - - # Backpropagation - delta_z2 = a2 - one_hot_labels - delta_a1 = np.matmul(delta_z2, w2.T) - delta_z1 = delta_a1 * sigmoid_derivative(a1) - - # Update weights and biases - w2 -= learning_rate * np.matmul(a1.T, delta_z2) - b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True) - w1 -= learning_rate * np.matmul(a0.T, delta_z1) - b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True) - - # Calculate training accuracy for this epoch + w1, b1, w2, b2, loss = learn_once_cross_entropy(w1,b1,w2,b2,data_train,labels_train,learning_rate) + + # Calcule l'accuracy pour cette epoch a0 = data_train z1 = np.matmul(a0, w1) + b1 a1 = sigmoid(z1) z2 = np.matmul(a1, w2) + b2 - a2 = sigmoid(z2) - train_accuracy = compute_accuracy(a2, labels_train) + a2 = softmax(z2) + predictions = a2 + + train_accuracy = compute_accuracy(predictions, labels_train) train_accuracies.append(train_accuracy) print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss:.4f}, Training Accuracy: {train_accuracy:.4f}") @@ -122,11 +105,10 @@ def test_mlp(w1, b1, w2, b2, data_test, labels_test): z1 = np.matmul(a0, w1) + b1 a1 = sigmoid(z1) z2 = np.matmul(a1, w2) + b2 - a2 = sigmoid(z2) + a2 = softmax(z2) + predictions = a2 - predicted_labels = np.argmax(a2, axis=1) - correct = np.sum(predicted_labels == labels_test) - test_accuracy = correct / len(labels_test) + test_accuracy = compute_accuracy(predictions, labels_test) return test_accuracy @@ -140,41 +122,7 @@ def run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, lear w2 = 2 * np.random.rand(d_h, d_out) - 1 b2 = np.zeros((1, d_out)) - train_accuracies = [] - - for epoch in range(num_epochs): - for i in range(len(data_train)): - data = data_train[i:i+1] - labels = labels_train[i:i+1] - - a0 = data - z1 = np.matmul(a0, w1) + b1 - a1 = sigmoid(z1) - z2 = np.matmul(a1, w2) + b2 - a2 = sigmoid(z2) - - one_hot_labels = one_hot(labels,num_classes=d_out) - loss = -np.sum(one_hot_labels * np.log(a2) + (1 - one_hot_labels) * np.log(1 - a2)) - - delta_z2 = a2 - one_hot_labels - delta_a1 = np.matmul(delta_z2, w2.T) - delta_z1 = delta_a1 * a1 * (1 - a1) - - w2 -= learning_rate * np.matmul(a1.T, delta_z2) - b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True) - w1 -= learning_rate * np.matmul(a0.T, delta_z1) - b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True) - - a0 = data_train - z1 = np.matmul(a0, w1) + b1 - a1 = sigmoid(z1) - z2 = np.matmul(a1, w2) + b2 - a2 = sigmoid(z2) - - train_accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train) - train_accuracies.append(train_accuracy) - print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss:.4f}, Training Accuracy: {train_accuracy:.4f}") - + w1,b1,w2,b2, train_accuracies = train_mlp(w1,b1,w2,b2,data_train,labels_train,learning_rate,num_epochs) test_accuracy = test_mlp(w1, b1, w2, b2, data_test, labels_test) return train_accuracies, test_accuracy diff --git a/read_cifar.py b/read_cifar.py index b1c8b2dc8cb15f32da7f6649c6e56fac602037f9..8934ac93f14bd07981049508b44352b265004ce6 100644 --- a/read_cifar.py +++ b/read_cifar.py @@ -6,10 +6,9 @@ def read_cifar_batch(batch_path): with open(batch_path, 'rb') as file: batch_data = pickle.load(file, encoding='bytes') - data = batch_data[b'data'] # CIFAR-10 data - labels = batch_data[b'labels'] # Class labels + data = batch_data[b'data'] + labels = batch_data[b'labels'] - # Convertis data et label dans les types souhaités data = np.array(data, dtype=np.float32) labels = np.array(labels, dtype=np.int64) @@ -36,12 +35,10 @@ def read_cifar(directory_path): def split_dataset(data, labels, split): - # Get the number of samples in the dataset num_samples = len(data) - # Calculate the number of samples for training and testing + # Calcule le nombre de samples d'entrainement num_train_samples = int(num_samples * split) - num_test_samples = num_samples - num_train_samples # Cree des permutations aléatoires shuffle_indices = np.random.permutation(num_samples) @@ -54,14 +51,3 @@ def split_dataset(data, labels, split): return data_train, labels_train, data_test, labels_test - - -if __name__ == '__main__': - batch_path = "data/cifar-10-python\cifar-10-batches-py" - data, labels = read_cifar(batch_path) - print("Data shape:", data.shape) - print("Labels shape:", labels.shape) - split=0.9 - data_train, labels_train, data_test, labels_test = split_dataset(data, labels, split) - - diff --git a/results/knn.png b/results/knn.png index 6b8526b22fe89b583153d7248d6dc77b64a43b94..1a7f1b7296116b136e90bf6b081a74762d0456fe 100644 Binary files a/results/knn.png and b/results/knn.png differ diff --git a/results/mlp.png b/results/mlp.png index d350948b28266707c2fcd40988d16e15c21bc93d..42f42b3ab6eba1cdb98a3ffa79db7d8fc541ff98 100644 Binary files a/results/mlp.png and b/results/mlp.png differ diff --git a/results/time_knn.png b/results/time_knn.png new file mode 100644 index 0000000000000000000000000000000000000000..eaf87653e0a74fe878b1bd923f4f2d4f25c75a28 Binary files /dev/null and b/results/time_knn.png differ