diff --git a/main.ipynb b/main.ipynb index e1dc11eeb0ddbb56e5a82df007b92a1d9cfd2cd6..72d5d962f2be70c0a1acfd68913a545a2b823724 100644 --- a/main.ipynb +++ b/main.ipynb @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -410,7 +410,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -420,7 +420,11 @@ " learn_once_mse,\n", " one_hot,\n", " learn_once_cross_entropy,\n", - " run_mlp_training\n", + " run_mlp_training,\n", + " learn_once_mse,\n", + " softmax,\n", + " test_mlp,\n", + " train_mlp,\n", ")" ] }, @@ -433,7 +437,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -454,14 +458,14 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loss: 0.10367831888711801\n" + "Loss: 0.07218991126027921\n" ] } ], @@ -488,7 +492,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -499,7 +503,7 @@ " [1., 0., 0.]])" ] }, - "execution_count": 52, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -517,9 +521,17 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 43, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(30,)\n" + ] + } + ], "source": [ "N = 30 # number of input data\n", "d_in = 3 # input dimension\n", @@ -533,19 +545,21 @@ "b2 = np.zeros((1, d_out)) # second layer biaises\n", "\n", "random_data = np.random.rand(N, d_in) # create a random data\n", - "random_targets = np.random.randint(1, d_out, N) # create a random targets" + "random_targets = np.random.randint(1, d_out, (N)) # create a random targets\n", + "\n", + "print(random_targets.shape)" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loss: 0.6940785845571713\n" + "Loss: 0.7075029802848043\n" ] } ], @@ -572,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -583,15 +597,127 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:210: RuntimeWarning: overflow encountered in exp\n", - " z1 = np.matmul(a0, w1) + b1 # input of the hidden layer\n" + "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:221: RuntimeWarning: overflow encountered in exp\n", + " z2 = np.matmul(a1, w2) + b2\n", + "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:118: RuntimeWarning: overflow encountered in exp\n", + " z2 = np.matmul(a1, w2) + b2\n", + "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:16: RuntimeWarning: invalid value encountered in divide\n", + " \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 0.10592592592592592\n", + "Epoch 1: 0.09855555555555555\n", + "Epoch 2: 0.10437037037037038\n", + "Epoch 3: 0.109\n", + "Epoch 4: 0.10901851851851851\n", + "Epoch 5: 0.10907407407407407\n", + "Epoch 6: 0.10044444444444445\n", + "Epoch 7: 0.0995925925925926\n", + "Epoch 8: 0.09985185185185186\n", + "Epoch 9: 0.09985185185185186\n", + "Epoch 10: 0.09985185185185186\n", + "Epoch 11: 0.09985185185185186\n", + "Epoch 12: 0.09985185185185186\n", + "Epoch 13: 0.09985185185185186\n", + "Epoch 14: 0.09985185185185186\n", + "Epoch 15: 0.09985185185185186\n", + "Epoch 16: 0.09985185185185186\n", + "Epoch 17: 0.09985185185185186\n", + "Epoch 18: 0.09985185185185186\n", + "Epoch 19: 0.09985185185185186\n", + "Epoch 20: 0.09985185185185186\n", + "Epoch 21: 0.09985185185185186\n", + "Epoch 22: 0.09985185185185186\n", + "Epoch 23: 0.09985185185185186\n", + "Epoch 24: 0.09985185185185186\n", + "Epoch 25: 0.09985185185185186\n", + "Epoch 26: 0.09985185185185186\n", + "Epoch 27: 0.09985185185185186\n", + "Epoch 28: 0.09985185185185186\n", + "Epoch 29: 0.09985185185185186\n", + "Epoch 30: 0.09985185185185186\n", + "Epoch 31: 0.09985185185185186\n", + "Epoch 32: 0.09985185185185186\n", + "Epoch 33: 0.09985185185185186\n", + "Epoch 34: 0.09985185185185186\n", + "Epoch 35: 0.09985185185185186\n", + "Epoch 36: 0.09985185185185186\n", + "Epoch 37: 0.09985185185185186\n", + "Epoch 38: 0.09985185185185186\n", + "Epoch 39: 0.09985185185185186\n", + "Epoch 40: 0.09985185185185186\n", + "Epoch 41: 0.09985185185185186\n", + "Epoch 42: 0.09985185185185186\n", + "Epoch 43: 0.09985185185185186\n", + "Epoch 44: 0.09985185185185186\n", + "Epoch 45: 0.09985185185185186\n", + "Epoch 46: 0.09985185185185186\n", + "Epoch 47: 0.09985185185185186\n", + "Epoch 48: 0.09985185185185186\n", + "Epoch 49: 0.09985185185185186\n", + "Epoch 50: 0.09985185185185186\n", + "Epoch 51: 0.09985185185185186\n", + "Epoch 52: 0.09985185185185186\n", + "Epoch 53: 0.09985185185185186\n", + "Epoch 54: 0.09985185185185186\n", + "Epoch 55: 0.09985185185185186\n", + "Epoch 56: 0.09985185185185186\n", + "Epoch 57: 0.09985185185185186\n", + "Epoch 58: 0.09985185185185186\n", + "Epoch 59: 0.09985185185185186\n", + "Epoch 60: 0.09985185185185186\n", + "Epoch 61: 0.09985185185185186\n", + "Epoch 62: 0.09985185185185186\n", + "Epoch 63: 0.09985185185185186\n", + "Epoch 64: 0.09985185185185186\n", + "Epoch 65: 0.09985185185185186\n", + "Epoch 66: 0.09985185185185186\n", + "Epoch 67: 0.09985185185185186\n", + "Epoch 68: 0.09985185185185186\n", + "Epoch 69: 0.09985185185185186\n", + "Epoch 70: 0.09985185185185186\n", + "Epoch 71: 0.09985185185185186\n", + "Epoch 72: 0.09985185185185186\n", + "Epoch 73: 0.09985185185185186\n", + "Epoch 74: 0.09985185185185186\n", + "Epoch 75: 0.09985185185185186\n", + "Epoch 76: 0.09985185185185186\n", + "Epoch 77: 0.09985185185185186\n", + "Epoch 78: 0.09985185185185186\n", + "Epoch 79: 0.09985185185185186\n", + "Epoch 80: 0.09985185185185186\n", + "Epoch 81: 0.09985185185185186\n", + "Epoch 82: 0.09985185185185186\n", + "Epoch 83: 0.09985185185185186\n", + "Epoch 84: 0.09985185185185186\n", + "Epoch 85: 0.09985185185185186\n", + "Epoch 86: 0.09985185185185186\n", + "Epoch 87: 0.09985185185185186\n", + "Epoch 88: 0.09985185185185186\n", + "Epoch 89: 0.09985185185185186\n", + "Epoch 90: 0.09985185185185186\n", + "Epoch 91: 0.09985185185185186\n", + "Epoch 92: 0.09985185185185186\n", + "Epoch 93: 0.09985185185185186\n", + "Epoch 94: 0.09985185185185186\n", + "Epoch 95: 0.09985185185185186\n", + "Epoch 96: 0.09985185185185186\n", + "Epoch 97: 0.09985185185185186\n", + "Epoch 98: 0.09985185185185186\n", + "Epoch 99: 0.09985185185185186\n", + "Epoch 100: 0.09985185185185186\n", + "Test accuracy: 0.10133333333333333\n" ] } ], @@ -614,9 +740,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Plot the training accuracy values\n", "plt.plot(range(num_epoch + 1), training_accuracy_values)\n", @@ -625,6 +762,13 @@ "plt.savefig('./results/mlp.png')\n", "plt.show()\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/mlp.py b/mlp.py index 38188f18290a7aa432e57167f5a65d09ba41f970..944df7fc4918ce010493b179bb1911cea67dd881 100644 --- a/mlp.py +++ b/mlp.py @@ -1,5 +1,20 @@ import numpy as np +EPSILON = 1e-8 + + +def softmax(x: np.ndarray) -> np.ndarray: + """Compute the softmax of the given vector. + + Args: + z (np.ndarray): The vector to compute the softmax of. + + Returns: + np.ndarray: The softmax of the given vector. + """ + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum(axis=0) + def learn_once_mse( w1: np.ndarray, @@ -46,11 +61,11 @@ def learn_once_mse( dC_da2 = 2 * (predictions - targets) / predictions.shape[0] dC_dz2 = dC_da2 * a2 * (1 - a2) dC_dw2 = np.matmul(a1.T, dC_dz2) - dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True) + dC_db2 = np.sum(dC_dz2, axis=0) dC_da1 = np.matmul(dC_dz2, w2.T) dC_dz1 = dC_da1 * a1 * (1 - a1) dC_dw1 = np.matmul(a0.T, dC_dz1) - dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True) + dC_db1 = np.sum(dC_dz1, axis=0) # Update weights and biases w1 -= learning_rate * dC_dw1 @@ -102,7 +117,8 @@ def learn_once_cross_entropy( z1 = np.matmul(a0, w1) + b1 a1 = 1 / (1 + np.exp(-z1)) z2 = np.matmul(a1, w2) + b2 - a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True) + a2 = softmax(z2) + predictions = a2 one_hot_targets = one_hot(labels_train) @@ -110,8 +126,8 @@ def learn_once_cross_entropy( # Compute loss (Cross Entropy) # https://arize.com/blog-course/binary-cross-entropy-log-loss/ loss = -np.mean( - one_hot_targets * np.log(predictions) - + (1 - one_hot_targets) * np.log(1 - predictions) + one_hot_targets * np.log(predictions + EPSILON) + + (1 - one_hot_targets) * np.log(1 - predictions + EPSILON) ) # Backward pass @@ -120,11 +136,11 @@ def learn_once_cross_entropy( dC_dz2 = a2 - one_hot_targets dC_dw2 = np.matmul(a1.T, dC_dz2) - dC_db2 = np.sum(dC_dz2, axis=0, keepdims=True) + dC_db2 = np.sum(dC_dz2, axis=0) dC_da1 = np.matmul(dC_dz2, w2.T) dC_dz1 = dC_da1 * a1 * (1 - a1) dC_dw1 = np.matmul(a0.T, dC_dz1) - dC_db1 = np.sum(dC_dz1, axis=0, keepdims=True) + dC_db1 = np.sum(dC_dz1, axis=0) # Update weights and biases w1 -= learning_rate * dC_dw1 @@ -204,7 +220,7 @@ def test_mlp( z1 = np.matmul(a0, w1) + b1 a1 = 1 / (1 + np.exp(-z1)) z2 = np.matmul(a1, w2) + b2 - a2 = np.exp(z2) / np.sum(np.exp(z2), axis=1, keepdims=True) + a2 = softmax(z2) predictions = a2 # Compute accuracy @@ -237,7 +253,7 @@ def run_mlp_training( """ d_in = data_train.shape[1] - d_out = labels_train.shape[0] + d_out = max(labels_train) + 1 # Random initialization of the network weights and biaises w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights diff --git a/results/mlp.png b/results/mlp.png new file mode 100644 index 0000000000000000000000000000000000000000..7adf3430932d16a46e17db300e3a1335a7211aac Binary files /dev/null and b/results/mlp.png differ