diff --git a/main.ipynb b/main.ipynb index 72d5d962f2be70c0a1acfd68913a545a2b823724..1a9357a783ae569558942a92e323bfeb2a078f29 100644 --- a/main.ipynb +++ b/main.ipynb @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 165, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 166, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 167, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 168, "metadata": {}, "outputs": [], "source": [ @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 169, "metadata": {}, "outputs": [ { @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 170, "metadata": {}, "outputs": [], "source": [ @@ -121,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 171, "metadata": {}, "outputs": [], "source": [ @@ -410,7 +410,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 187, "metadata": {}, "outputs": [], "source": [ @@ -437,7 +437,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 188, "metadata": {}, "outputs": [], "source": [ @@ -458,14 +458,14 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 189, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loss: 0.07218991126027921\n" + "Loss: 0.09592802009114174\n" ] } ], @@ -492,7 +492,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 190, "metadata": {}, "outputs": [ { @@ -503,7 +503,7 @@ " [1., 0., 0.]])" ] }, - "execution_count": 42, + "execution_count": 190, "metadata": {}, "output_type": "execute_result" } @@ -521,7 +521,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 191, "metadata": {}, "outputs": [ { @@ -552,14 +552,14 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 192, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loss: 0.7075029802848043\n" + "Loss: 0.7102590810256381\n" ] } ], @@ -570,7 +570,7 @@ " w2,\n", " b2,\n", " random_data,\n", - " random_targets,\n", + " one_hot(random_targets),\n", " 0.1,\n", ")\n", "\n", @@ -586,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 193, "metadata": {}, "outputs": [], "source": [ @@ -597,139 +597,58 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 194, + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize data\n", + "norm_train_data_09 = train_data_09 / 255\n", + "norm_test_data_09 = test_data_09 / 255" + ] + }, + { + "cell_type": "code", + "execution_count": 195, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:221: RuntimeWarning: overflow encountered in exp\n", - " z2 = np.matmul(a1, w2) + b2\n", - "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:118: RuntimeWarning: overflow encountered in exp\n", - " z2 = np.matmul(a1, w2) + b2\n", - "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:16: RuntimeWarning: invalid value encountered in divide\n", - " \n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch 0: 0.10592592592592592\n", - "Epoch 1: 0.09855555555555555\n", - "Epoch 2: 0.10437037037037038\n", - "Epoch 3: 0.109\n", - "Epoch 4: 0.10901851851851851\n", - "Epoch 5: 0.10907407407407407\n", - "Epoch 6: 0.10044444444444445\n", - "Epoch 7: 0.0995925925925926\n", - "Epoch 8: 0.09985185185185186\n", - "Epoch 9: 0.09985185185185186\n", - "Epoch 10: 0.09985185185185186\n", - "Epoch 11: 0.09985185185185186\n", - "Epoch 12: 0.09985185185185186\n", - "Epoch 13: 0.09985185185185186\n", - "Epoch 14: 0.09985185185185186\n", - "Epoch 15: 0.09985185185185186\n", - "Epoch 16: 0.09985185185185186\n", - "Epoch 17: 0.09985185185185186\n", - "Epoch 18: 0.09985185185185186\n", - "Epoch 19: 0.09985185185185186\n", - "Epoch 20: 0.09985185185185186\n", - "Epoch 21: 0.09985185185185186\n", - "Epoch 22: 0.09985185185185186\n", - "Epoch 23: 0.09985185185185186\n", - "Epoch 24: 0.09985185185185186\n", - "Epoch 25: 0.09985185185185186\n", - "Epoch 26: 0.09985185185185186\n", - "Epoch 27: 0.09985185185185186\n", - "Epoch 28: 0.09985185185185186\n", - "Epoch 29: 0.09985185185185186\n", - "Epoch 30: 0.09985185185185186\n", - "Epoch 31: 0.09985185185185186\n", - "Epoch 32: 0.09985185185185186\n", - "Epoch 33: 0.09985185185185186\n", - "Epoch 34: 0.09985185185185186\n", - "Epoch 35: 0.09985185185185186\n", - "Epoch 36: 0.09985185185185186\n", - "Epoch 37: 0.09985185185185186\n", - "Epoch 38: 0.09985185185185186\n", - "Epoch 39: 0.09985185185185186\n", - "Epoch 40: 0.09985185185185186\n", - "Epoch 41: 0.09985185185185186\n", - "Epoch 42: 0.09985185185185186\n", - "Epoch 43: 0.09985185185185186\n", - "Epoch 44: 0.09985185185185186\n", - "Epoch 45: 0.09985185185185186\n", - "Epoch 46: 0.09985185185185186\n", - "Epoch 47: 0.09985185185185186\n", - "Epoch 48: 0.09985185185185186\n", - "Epoch 49: 0.09985185185185186\n", - "Epoch 50: 0.09985185185185186\n", - "Epoch 51: 0.09985185185185186\n", - "Epoch 52: 0.09985185185185186\n", - "Epoch 53: 0.09985185185185186\n", - "Epoch 54: 0.09985185185185186\n", - "Epoch 55: 0.09985185185185186\n", - "Epoch 56: 0.09985185185185186\n", - "Epoch 57: 0.09985185185185186\n", - "Epoch 58: 0.09985185185185186\n", - "Epoch 59: 0.09985185185185186\n", - "Epoch 60: 0.09985185185185186\n", - "Epoch 61: 0.09985185185185186\n", - "Epoch 62: 0.09985185185185186\n", - "Epoch 63: 0.09985185185185186\n", - "Epoch 64: 0.09985185185185186\n", - "Epoch 65: 0.09985185185185186\n", - "Epoch 66: 0.09985185185185186\n", - "Epoch 67: 0.09985185185185186\n", - "Epoch 68: 0.09985185185185186\n", - "Epoch 69: 0.09985185185185186\n", - "Epoch 70: 0.09985185185185186\n", - "Epoch 71: 0.09985185185185186\n", - "Epoch 72: 0.09985185185185186\n", - "Epoch 73: 0.09985185185185186\n", - "Epoch 74: 0.09985185185185186\n", - "Epoch 75: 0.09985185185185186\n", - "Epoch 76: 0.09985185185185186\n", - "Epoch 77: 0.09985185185185186\n", - "Epoch 78: 0.09985185185185186\n", - "Epoch 79: 0.09985185185185186\n", - "Epoch 80: 0.09985185185185186\n", - "Epoch 81: 0.09985185185185186\n", - "Epoch 82: 0.09985185185185186\n", - "Epoch 83: 0.09985185185185186\n", - "Epoch 84: 0.09985185185185186\n", - "Epoch 85: 0.09985185185185186\n", - "Epoch 86: 0.09985185185185186\n", - "Epoch 87: 0.09985185185185186\n", - "Epoch 88: 0.09985185185185186\n", - "Epoch 89: 0.09985185185185186\n", - "Epoch 90: 0.09985185185185186\n", - "Epoch 91: 0.09985185185185186\n", - "Epoch 92: 0.09985185185185186\n", - "Epoch 93: 0.09985185185185186\n", - "Epoch 94: 0.09985185185185186\n", - "Epoch 95: 0.09985185185185186\n", - "Epoch 96: 0.09985185185185186\n", - "Epoch 97: 0.09985185185185186\n", - "Epoch 98: 0.09985185185185186\n", - "Epoch 99: 0.09985185185185186\n", - "Epoch 100: 0.09985185185185186\n", - "Test accuracy: 0.10133333333333333\n" + "Epoch: 1 / 10\n", + "Epoch: 2 / 10\n", + "Epoch: 3 / 10\n", + "Epoch: 4 / 10\n", + "Epoch: 5 / 10\n", + "Epoch: 6 / 10\n", + "Epoch: 7 / 10\n", + "Epoch: 8 / 10\n", + "Epoch: 9 / 10\n", + "Epoch: 10 / 10\n", + "Epoch 0: 0.0857037037037037\n", + "Epoch 1: 0.08585185185185185\n", + "Epoch 2: 0.0865\n", + "Epoch 3: 0.08796296296296297\n", + "Epoch 4: 0.09007407407407407\n", + "Epoch 5: 0.0915\n", + "Epoch 6: 0.09418518518518519\n", + "Epoch 7: 0.09607407407407408\n", + "Epoch 8: 0.09666666666666666\n", + "Epoch 9: 0.0979074074074074\n", + "Epoch 10: 0.09770370370370371\n", + "Test accuracy: 0.10083333333333333\n" ] } ], "source": [ "training_accuracy_values, test_accuracy = run_mlp_training(\n", - " train_data_09,\n", + " norm_train_data_09,\n", " train_labels_09,\n", - " test_data_09,\n", + " norm_test_data_09,\n", " test_labels_09,\n", " d_h,\n", " learning_rate,\n", - " num_epoch\n", + " 100\n", ")\n", "\n", "for (i, training_accuracy) in enumerate(training_accuracy_values):\n", diff --git a/mlp.py b/mlp.py index 944df7fc4918ce010493b179bb1911cea67dd881..905d829046240ba2066e487db903bf3c30e2a3e8 100644 --- a/mlp.py +++ b/mlp.py @@ -13,7 +13,19 @@ def softmax(x: np.ndarray) -> np.ndarray: np.ndarray: The softmax of the given vector. """ e_x = np.exp(x - np.max(x)) - return e_x / e_x.sum(axis=0) + return e_x / (e_x.sum(axis=0) + EPSILON) + + +def sigmoid(x: np.ndarray) -> np.ndarray: + """Compute the sigmoid of the given vector. + + Args: + z (np.ndarray): The vector to compute the sigmoid of. + + Returns: + np.ndarray: The sigmoid of the given vector. + """ + return 1 / (1 + np.exp(-x)) def learn_once_mse( @@ -43,13 +55,9 @@ def learn_once_mse( # Forward pass a0 = data # the data are the input of the first layer z1 = np.matmul(a0, w1) + b1 # input of the hidden layer - a1 = 1 / ( - 1 + np.exp(-z1) - ) # output of the hidden layer (sigmoid activation function) + a1 = sigmoid(z1) # output of the hidden layer (sigmoid activation function) z2 = np.matmul(a1, w2) + b2 # input of the output layer - a2 = 1 / ( - 1 + np.exp(-z2) - ) # output of the output layer (sigmoid activation function) + a2 = sigmoid(z2) # output of the output layer (sigmoid activation function) predictions = a2 # the predicted values are the outputs of the output layer # Compute loss (MSE) @@ -58,7 +66,7 @@ def learn_once_mse( # Backward pass # Compute gradients - dC_da2 = 2 * (predictions - targets) / predictions.shape[0] + dC_da2 = 2 * (predictions - targets) / data.shape[0] dC_dz2 = dC_da2 * a2 * (1 - a2) dC_dw2 = np.matmul(a1.T, dC_dz2) dC_db2 = np.sum(dC_dz2, axis=0) @@ -115,26 +123,24 @@ def learn_once_cross_entropy( # Forward pass a0 = data z1 = np.matmul(a0, w1) + b1 - a1 = 1 / (1 + np.exp(-z1)) + a1 = sigmoid(z1) z2 = np.matmul(a1, w2) + b2 a2 = softmax(z2) predictions = a2 - one_hot_targets = one_hot(labels_train) - # Compute loss (Cross Entropy) # https://arize.com/blog-course/binary-cross-entropy-log-loss/ loss = -np.mean( - one_hot_targets * np.log(predictions + EPSILON) - + (1 - one_hot_targets) * np.log(1 - predictions + EPSILON) + labels_train * np.log(predictions + EPSILON) + + (1 - labels_train) * np.log(1 - predictions + EPSILON) ) # Backward pass # Compute gradients - dC_dz2 = a2 - one_hot_targets + dC_dz2 = (a2 - labels_train) / data.shape[0] dC_dw2 = np.matmul(a1.T, dC_dz2) dC_db2 = np.sum(dC_dz2, axis=0) dC_da1 = np.matmul(dC_dz2, w2.T) @@ -181,10 +187,14 @@ def train_mlp( accuracy = test_mlp(w1, b1, w2, b2, data_train, labels_train) accuracies = [accuracy] - for _ in range(num_epoch): + one_hot_labels = one_hot(labels_train) + + for epoch in range(num_epoch): # Train once + print("Epoch:", epoch + 1, "/", num_epoch) + w1, b1, w2, b2, _ = learn_once_cross_entropy( - w1, b1, w2, b2, data_train, labels_train, learning_rate + w1, b1, w2, b2, data_train, one_hot_labels, learning_rate ) # Compute current model training accuracy @@ -218,7 +228,7 @@ def test_mlp( # Forward pass a0 = data_test z1 = np.matmul(a0, w1) + b1 - a1 = 1 / (1 + np.exp(-z1)) + a1 = sigmoid(z1) z2 = np.matmul(a1, w2) + b2 a2 = softmax(z2) predictions = a2 diff --git a/results/mlp.png b/results/mlp.png index 7adf3430932d16a46e17db300e3a1335a7211aac..de1a6be9de2e259aaaea5d0553b55697bf7bb139 100644 Binary files a/results/mlp.png and b/results/mlp.png differ