diff --git a/main.ipynb b/main.ipynb index 48b0b864ea73fc38bcf1b9b6e379a711ae0d7e12..e1dc11eeb0ddbb56e5a82df007b92a1d9cfd2cd6 100644 --- a/main.ipynb +++ b/main.ipynb @@ -14,20 +14,45 @@ "## Dataset" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### General imports" + ] + }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ + "from importlib import reload\n", "import numpy as np\n", - "import matplotlib.pyplot as plt\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "import read_cifar\n", + "reload(read_cifar)\n", "from read_cifar import read_cifar, split_dataset" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -36,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -66,6 +91,27 @@ "print(cifar_labels[0:5])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 0.9 Split" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Split dataset into training and testing with 90% training and 10% testing\n", + "train_data_09, train_labels_09, test_data_09, test_labels_09 = split_dataset(\n", + " cifar_data,\n", + " cifar_labels,\n", + " 0.9\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -75,10 +121,12 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ + "import knn\n", + "reload(knn)\n", "from knn import evaluate_knn, distance_matrix, knn_predict" ] }, @@ -91,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -108,7 +156,12 @@ "source": [ "example_split = 0.8\n", "example_k = 3\n", - "example_train_data, example_train_labels, example_test_data, example_test_labels = split_dataset(cifar_data, cifar_labels, example_split)\n", + "example_train_data, example_train_labels, example_test_data, example_test_labels = split_dataset(\n", + " cifar_data,\n", + " cifar_labels,\n", + " example_split\n", + ")\n", + "\n", "\n", "print(example_train_data.shape)\n", "print(example_train_labels.shape)\n", @@ -118,19 +171,25 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.33066666666666666\n" + "0.3294166666666667\n" ] } ], "source": [ - "example_accuracy = evaluate_knn(example_train_data, example_train_labels, example_test_data, example_test_labels, example_k)\n", + "example_accuracy = evaluate_knn(\n", + " example_train_data,\n", + " example_train_labels,\n", + " example_test_data,\n", + " example_test_labels,\n", + " example_k\n", + ")\n", "\n", "print(example_accuracy)" ] @@ -144,17 +203,7 @@ }, { "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "# Split dataset into training and testing with 90% training and 10% testing\n", - "train_data_09, train_labels_09, test_data_09, test_labels_09 = split_dataset(cifar_data, cifar_labels, 0.9)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -164,33 +213,33 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "k = 1, accuracy = 0.3506666666666667\n", - "k = 2, accuracy = 0.30716666666666664\n", - "k = 3, accuracy = 0.3298333333333333\n", - "k = 4, accuracy = 0.3328333333333333\n", - "k = 5, accuracy = 0.3323333333333333\n", - "k = 6, accuracy = 0.3383333333333333\n", - "k = 7, accuracy = 0.3401666666666667\n", - "k = 8, accuracy = 0.3395\n", - "k = 9, accuracy = 0.3416666666666667\n", - "k = 10, accuracy = 0.3388333333333333\n", - "k = 11, accuracy = 0.3416666666666667\n", - "k = 12, accuracy = 0.3426666666666667\n", - "k = 13, accuracy = 0.3435\n", - "k = 14, accuracy = 0.3405\n", - "k = 15, accuracy = 0.3395\n", - "k = 16, accuracy = 0.341\n", - "k = 17, accuracy = 0.3375\n", - "k = 18, accuracy = 0.33816666666666667\n", - "k = 19, accuracy = 0.33466666666666667\n", - "k = 20, accuracy = 0.333\n" + "k = 1, accuracy = 0.35033333333333333\n", + "k = 2, accuracy = 0.3175\n", + "k = 3, accuracy = 0.331\n", + "k = 4, accuracy = 0.3418333333333333\n", + "k = 5, accuracy = 0.346\n", + "k = 6, accuracy = 0.3393333333333333\n", + "k = 7, accuracy = 0.3405\n", + "k = 8, accuracy = 0.33866666666666667\n", + "k = 9, accuracy = 0.346\n", + "k = 10, accuracy = 0.3413333333333333\n", + "k = 11, accuracy = 0.3368333333333333\n", + "k = 12, accuracy = 0.335\n", + "k = 13, accuracy = 0.3363333333333333\n", + "k = 14, accuracy = 0.3378333333333333\n", + "k = 15, accuracy = 0.333\n", + "k = 16, accuracy = 0.33116666666666666\n", + "k = 17, accuracy = 0.33016666666666666\n", + "k = 18, accuracy = 0.3318333333333333\n", + "k = 19, accuracy = 0.3293333333333333\n", + "k = 20, accuracy = 0.32516666666666666\n" ] } ], @@ -206,12 +255,12 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "<Figure size 640x480 with 1 Axes>" ] @@ -229,12 +278,353 @@ "plt.show()\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Artificial Neural Network" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mathematical Formulation\n", + "\n", + "#### 1\n", + "\n", + "We have that $\\sigma(x) = \\frac{1}{1+e^{-x}}$. We can calculate the derivative of $\\sigma$ as follows:\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\sigma'(x) &= \\frac{d}{dx} \\frac{1}{1+e^{-x}} \\\\\n", + " &= \\frac{d}{dx} (1+e^{-x})^{-1} \\\\\n", + " &= -(1+e^{-x})^{-2} (-e^{-x}) \\\\\n", + " &= \\frac{e^{-x}}{(1+e^{-x})^{2}} \\\\\n", + " &= \\frac{1}{1+e^{-x}} \\frac{e^{-x}}{1+e^{-x}} \\\\\n", + " &= \\frac{1}{1+e^{-x}} \\frac{1+e^{-x}-1}{1+e^{-x}} \\\\\n", + " &= \\frac{1}{1+e^{-x}} \\left( 1 - \\frac{1}{1+e^{-x}} \\right) \\\\\n", + " &= \\sigma(x) (1 - \\sigma(x))\n", + "\\end{align}\n", + "$$\n", + "\n", + "Therefore we find that $\\sigma'(x) = \\sigma(x) (1 - \\sigma(x))$.\n", + "\n", + "#### 2\n", + "\n", + "We use mean squared error as our loss function. We have that\n", + "\n", + "$$\n", + " C = \\frac{1}{N_{out}} \\Sigma_{i=1}^{N_{out}} (\\hat{y}_i - y_i)^2\n", + "$$\n", + "\n", + "where $N_{out}$ is the number of output neurons, $\\hat{y}_i$ is the predicted value of the $i$-th output neuron and $y_i$ is the true value of the $i$-th output neuron (in our case, $y_i = a^{(2)}_i$). We have that\n", + "\n", + "We can express $\\frac{dC}{dA^{(2)}}$ as follows:\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\frac{dC}{da^{(2)}_i} &= \\frac{d}{da^{(2)}_i} \\left( \\frac{1}{N_{out}} \\Sigma_{i=1}^{N_{out}} (a^{(2)}_i - y_i)^2 \\right) \\\\\n", + " &= \\frac{d}{da^{(2)}_i} \\left( \\frac{1}{N_{out}} ((a^{(2)}_1 - y_1)^2 + ... + (a^{(2)}_i - y_i)^2 + ... + (a^{(2)}_{N_{out}} - y_{N_{out}})^2) \\right) \\\\\n", + " &= \\frac{1}{N_{out}} 2(a^{(2)}_i - y_i)\n", + "\\end{align}\n", + "$$\n", + "\n", + "And in vector form\n", + "\n", + "$$\n", + " \\frac{dC}{dA^{(2)}} = \\frac{1}{N_{out}} 2(A^{(2)} - Y)\n", + "$$\n", + "\n", + "#### 3\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\frac{dC}{dZ^{(2)}} &= \\frac{dC}{dA^{(2)}} \\frac{dA^{(2)}}{dZ^{(2)}} \\\\\n", + " &= \\frac{dC}{dA^{(2)}} \\frac{dA^{(2)}}{dZ^{(2)}} \\\\\n", + " &= \\frac{dC}{dA^{(2)}} \\frac{d}{dZ^{(2)}} \\sigma(Z^{(2)}) \\\\ \n", + " &= \\frac{dC}{dA^{(2)}} \\sigma'(Z^{(2)}) \\\\\n", + " &= \\frac{dC}{dA^{(2)}} \\sigma(Z^{(2)}) (1 - \\sigma(Z^{(2)})) \\\\\n", + " &= \\frac{dC}{dA^{(2)}} A^{(2)} (1 - A^{(2)})\n", + "\\end{align}\n", + "$$\n", + "\n", + "#### 4\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\frac{dC}{dW^{(2)}} &= \\frac{dC}{dZ^{(2)}} \\frac{dZ^{(2)}}{dW^{(2)}} \\\\\n", + " &= \\frac{dC}{dZ^{(2)}} \\frac{d}{dW^{(2)}} \\left( W^{(2)} A^{(1)} + B^{(2)} \\right) \\\\\n", + " &= \\frac{dC}{dZ^{(2)}} A^{(1)}\n", + "\\end{align}\n", + "$$\n", + "\n", + "#### 5\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\frac{dC}{dB^{(2)}} &= \\frac{dC}{dZ^{(2)}} \\frac{dZ^{(2)}}{dB^{(2)}} \\\\\n", + " &= \\frac{dC}{dZ^{(2)}} \\frac{d}{dB^{(2)}} \\left( W^{(2)} A^{(1)} + B^{(2)} \\right) \\\\\n", + " &= \\frac{dC}{dZ^{(2)}} 1 \\\\\n", + " &= \\frac{dC}{dZ^{(2)}}\n", + "\\end{align}\n", + "$$\n", + "\n", + "#### 6\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\frac{dC}{dA^{(1)}} &= \\frac{dC}{dZ^{(2)}} \\frac{dZ^{(2)}}{dA^{(1)}} \\\\\n", + " &= \\frac{dC}{dZ^{(2)}} \\frac{d}{dA^{(1)}} \\left( W^{(2)} A^{(1)} + B^{(2)} \\right) \\\\\n", + " &= \\frac{dC}{dZ^{(2)}} W^{(2)}\n", + "\\end{align}\n", + "$$\n", + "\n", + "#### 7\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\frac{dC}{dZ^{(1)}} &= \\frac{dC}{dA^{(1)}} \\frac{dA^{(1)}}{dZ^{(1)}} \\\\\n", + " &= \\frac{dC}{dA^{(1)}} A^{(1)} (1 - A^{(1)})\n", + "\\end{align}\n", + "$$\n", + "\n", + "#### 8\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\frac{dC}{dW^{(1)}} &= \\frac{dC}{dZ^{(1)}} \\frac{dZ^{(1)}}{dW^{(1)}} \\\\\n", + " &= \\frac{dC}{dZ^{(1)}} A^{(0)}\n", + "\\end{align}\n", + "$$\n", + "\n", + "#### 9\n", + "\n", + "$$\n", + "\\begin{align}\n", + " \\frac{dC}{dB^{(1)}} &= \\frac{dC}{dZ^{(1)}} \\frac{dZ^{(1)}}{dB^{(1)}} \\\\\n", + " &= \\frac{dC}{dZ^{(1)}}\n", + "\\end{align}\n", + "$$\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "import mlp\n", + "reload(mlp)\n", + "from mlp import (\n", + " learn_once_mse,\n", + " one_hot,\n", + " learn_once_cross_entropy,\n", + " run_mlp_training\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Learning pass example" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "N = 30 # number of input data\n", + "d_in = 3 # input dimension\n", + "d_h = 3 # number of neurons in the hidden layer\n", + "d_out = 2 # output dimension (number of neurons of the output layer)\n", + "\n", + "# Random initialization of the network weights and biaises\n", + "w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights\n", + "b1 = np.zeros((1, d_h)) # first layer biaises\n", + "w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights\n", + "b2 = np.zeros((1, d_out)) # second layer biaises\n", + "\n", + "random_data = np.random.rand(N, d_in) # create a random data\n", + "random_targets = np.random.rand(N, d_out) # create a random targets" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loss: 0.10367831888711801\n" + ] + } + ], + "source": [ + "updated_w1, updated_b1, updated_w2, updated_b2, loss = learn_once_mse(\n", + " w1,\n", + " b1,\n", + " w2,\n", + " b2,\n", + " random_data,\n", + " random_targets,\n", + " 0.1,\n", + ")\n", + "\n", + "print(\"Loss:\", loss)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### One-hot encoding" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0., 1., 0.],\n", + " [0., 0., 1.],\n", + " [1., 0., 0.]])" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_hot(np.array([1, 2, 0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cross entropy pass example" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "N = 30 # number of input data\n", + "d_in = 3 # input dimension\n", + "d_h = 3 # number of neurons in the hidden layer\n", + "d_out = 5 # output dimension (number of neurons of the output layer)\n", + "\n", + "# Random initialization of the network weights and biaises\n", + "w1 = 2 * np.random.rand(d_in, d_h) - 1 # first layer weights\n", + "b1 = np.zeros((1, d_h)) # first layer biaises\n", + "w2 = 2 * np.random.rand(d_h, d_out) - 1 # second layer weights\n", + "b2 = np.zeros((1, d_out)) # second layer biaises\n", + "\n", + "random_data = np.random.rand(N, d_in) # create a random data\n", + "random_targets = np.random.randint(1, d_out, N) # create a random targets" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loss: 0.6940785845571713\n" + ] + } + ], + "source": [ + "cross_w1, cross_b1, cross_w2, cross_b2, cross_loss = learn_once_cross_entropy(\n", + " w1,\n", + " b1,\n", + " w2,\n", + " b2,\n", + " random_data,\n", + " random_targets,\n", + " 0.1,\n", + ")\n", + "\n", + "print(\"Loss:\", cross_loss)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### split = 0.9, d_h = 64, learning_rate 0.1, 100 epochs" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "d_h = 64\n", + "learning_rate = 0.1\n", + "num_epoch = 100" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/tracert6/Documents/ECL-S9-DeepLearning/TP/TP1/mlp.py:210: RuntimeWarning: overflow encountered in exp\n", + " z1 = np.matmul(a0, w1) + b1 # input of the hidden layer\n" + ] + } + ], + "source": [ + "training_accuracy_values, test_accuracy = run_mlp_training(\n", + " train_data_09,\n", + " train_labels_09,\n", + " test_data_09,\n", + " test_labels_09,\n", + " d_h,\n", + " learning_rate,\n", + " num_epoch\n", + ")\n", + "\n", + "for (i, training_accuracy) in enumerate(training_accuracy_values):\n", + " print(f\"Epoch {i}: {training_accuracy}\")\n", + "\n", + "print(f\"Test accuracy: {test_accuracy}\")" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Plot the training accuracy values\n", + "plt.plot(range(num_epoch + 1), training_accuracy_values)\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Training Accuracy\")\n", + "plt.savefig('./results/mlp.png')\n", + "plt.show()\n" + ] } ], "metadata": {