diff --git a/__pycache__/knn.cpython-39.pyc b/__pycache__/knn.cpython-39.pyc index eb7b11d1039c12379dba250936aed3b888321cab..d6e35752fafbc6e6852292d0b3aa65e2ed09cd2f 100644 Binary files a/__pycache__/knn.cpython-39.pyc and b/__pycache__/knn.cpython-39.pyc differ diff --git a/__pycache__/mlp.cpython-39.pyc b/__pycache__/mlp.cpython-39.pyc index 846ddab2e99713be53ae094d802f8f310c3e86b9..85525b0473b5f8b7c689333ce016d526d47c9911 100644 Binary files a/__pycache__/mlp.cpython-39.pyc and b/__pycache__/mlp.cpython-39.pyc differ diff --git a/__pycache__/read_cifar.cpython-39.pyc b/__pycache__/read_cifar.cpython-39.pyc index 3738cdce594ff5472b3bbb3863d23cb76f337b83..88267c36597ce7b4289a7a172f227d0b6e03ddac 100644 Binary files a/__pycache__/read_cifar.cpython-39.pyc and b/__pycache__/read_cifar.cpython-39.pyc differ diff --git a/main.py b/main.py index e51199fc76855f177ff4b48412a6b98aaa420e6e..8eeff9748d0fd516160a59126f51497c7a9fe877 100644 --- a/main.py +++ b/main.py @@ -14,7 +14,7 @@ batch_path = "data/cifar-10-python\cifar-10-batches-py" data, labels = read_cifar.read_cifar(batch_path) data_train, labels_train, data_test, labels_test = read_cifar.split_dataset(data, labels, split) -k_values = range(1, 21) +"""k_values = range(1, 21) accuracies = [] times = [] @@ -45,7 +45,7 @@ plt.ylabel('time') plt.xticks(k_values) plt.grid(True) plt.savefig('results/time_knn.png') -plt.show() +plt.show()""" train_accuracies,test_accuracy = mlp.run_mlp_training(data_train, labels_train, data_test, labels_test, d_h, learning_rate, num_epochs) @@ -57,4 +57,5 @@ def plot_learning_accuracy(train_accuracies): plt.title("MLP Training Accuracy") plt.savefig("results/mlp.png") -plot_learning_accuracy(train_accuracies) \ No newline at end of file +plot_learning_accuracy(train_accuracies) + diff --git a/mlp.py b/mlp.py index 35639c08d6cd126cf9df72be696ec0985cbbbd71..ddc6a1227bc7e2639a6975c6f900b9b86b6fea8c 100644 --- a/mlp.py +++ b/mlp.py @@ -11,28 +11,28 @@ def learn_once_mse(w1, b1, w2, b2, data, targets, learning_rate): z1 = np.matmul(a0, w1) + b1 a1 = sigmoid(z1) z2 = np.matmul(a1, w2) + b2 - a2 = sigmoid(z2) + a2 = softmax(z2) predictions = a2 # MSE loss = np.mean(np.square(predictions - targets)) # Backpropagation - delta_a2 = 2 * (predictions - targets) / data.shape[0] + delta_a2 = 2 * (predictions.T - targets) / data.shape[0] delta_z2 = delta_a2 * sigmoid_derivative(a2) delta_a1 = np.matmul(delta_z2, w2.T) delta_z1 = delta_a1 * sigmoid_derivative(a1) # Mise à jour weight et biais - w2 -= learning_rate * np.matmul(a1.T, delta_z2) - b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True) - w1 -= learning_rate * np.matmul(a0.T, delta_z1) - b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True) + w2 -= learning_rate * np.matmul(a1.T, delta_z2) / data.shape[0] + b2 -= learning_rate * np.mean(delta_z2, axis=0) + w1 -= learning_rate * np.dot(a0.T, delta_z1) / data.shape[0] + b1 -= learning_rate * np.mean(delta_z1, axis=0) return w1, b1, w2, b2, loss def softmax(z): #evite les instabilités de loss - exp_z = np.exp(z - np.max(z, axis=1, keepdims=True)) + exp_z = np.exp(z) return exp_z / np.sum(exp_z, axis=1, keepdims=True) @@ -55,20 +55,18 @@ def learn_once_cross_entropy(w1, b1, w2, b2, data, labels_train, learning_rate): # Calcule cross entropy m = len(labels_train) one_hot_labels = one_hot(labels_train) - epsilon = 1e-9 - predictions = np.clip(predictions, epsilon, 1 - epsilon) #évite les instabilités de loss loss = -np.mean(one_hot_labels * np.log(predictions)) # Backpropagation delta_z2 = a2 - one_hot_labels - delta_a1 = np.matmul(delta_z2, w2.T) + delta_a1 = np.dot(delta_z2, w2.T) delta_z1 = delta_a1 * sigmoid_derivative(a1) # Mise à jour weight et biais - w2 -= learning_rate * np.matmul(a1.T, delta_z2) - b2 -= learning_rate * np.sum(delta_z2, axis=0, keepdims=True) - w1 -= learning_rate * np.matmul(a0.T, delta_z1) - b1 -= learning_rate * np.sum(delta_z1, axis=0, keepdims=True) + w2 -= learning_rate * np.matmul(a1.T, delta_z2) / data.shape[0] + b2 -= learning_rate * np.mean(delta_z2, axis=0) + w1 -= learning_rate * np.dot(a0.T, delta_z1) / data.shape[0] + b1 -= learning_rate * np.mean(delta_z1, axis=0) return w1, b1, w2, b2, loss diff --git a/results/mlp.png b/results/mlp.png index 42f42b3ab6eba1cdb98a3ffa79db7d8fc541ff98..7354a5e71dae2f9c1338277d27c6b9cc9debc34e 100644 Binary files a/results/mlp.png and b/results/mlp.png differ