diff --git a/README.md b/README.md index d4d493fbce464f0b2ff4269445b7e4e9c340ba66..eb5dde9020f334dd53705715679bc3a394f20241 100644 --- a/README.md +++ b/README.md @@ -105,10 +105,11 @@ Video on [Binary Tree](https://www.youtube.com/watch?v=pkYVOmU3MgA&t=11510s) 📝 Assignment 1 - Analyzing a dataset -### Exam (december) - -- Final exam +### Exam (december 5th) +- Final written exam +- 2h +- No document ## Books and ressources diff --git a/figures/Logo_ECL.png b/figures/Logo_ECL.png deleted file mode 100644 index 00b57d6ca30841a9eb9360ded009fad011f38f6e..0000000000000000000000000000000000000000 Binary files a/figures/Logo_ECL.png and /dev/null differ diff --git a/figures/a-series-paper-sizes-1.jpg b/figures/a-series-paper-sizes-1.jpg deleted file mode 100755 index 76178c439fc92ea68d321743f7536bc527cf2310..0000000000000000000000000000000000000000 Binary files a/figures/a-series-paper-sizes-1.jpg and /dev/null differ diff --git a/figures/big-o-chart.png b/figures/big-o-chart.png deleted file mode 100644 index b9a195e17ecdc9cadadf1c1d6a12ffda9eff4c2c..0000000000000000000000000000000000000000 Binary files a/figures/big-o-chart.png and /dev/null differ diff --git a/figures/flowchart.png b/figures/flowchart.png deleted file mode 100644 index c85c4e20f8f7ed2c2f9fe26fb91fbdd845b47437..0000000000000000000000000000000000000000 Binary files a/figures/flowchart.png and /dev/null differ diff --git a/figures/logo-ecl.png b/figures/logo-ecl.png deleted file mode 100644 index 64dbe6a8dbcc291560a7d01f352614c0eafb5cba..0000000000000000000000000000000000000000 Binary files a/figures/logo-ecl.png and /dev/null differ diff --git a/figures/logo-emlyon.png b/figures/logo-emlyon.png deleted file mode 100644 index d55b2f66777a0e61adeeecf372e08e414f9353e0..0000000000000000000000000000000000000000 Binary files a/figures/logo-emlyon.png and /dev/null differ diff --git a/figures/xkcd_fixing_problems.png b/figures/xkcd_fixing_problems.png deleted file mode 100644 index 67d1fce7b1617bc9ba438cdbb7184647a9a9a7ba..0000000000000000000000000000000000000000 Binary files a/figures/xkcd_fixing_problems.png and /dev/null differ diff --git a/lectures/08-binary-trees slides.pdf b/lectures/08-binary-trees slides.pdf index 5db8adba5cb9b9ca412ea71a3bbe3eb22c60100f..e3205b78a3cceb5ad61261c40580677e5349d71b 100644 Binary files a/lectures/08-binary-trees slides.pdf and b/lectures/08-binary-trees slides.pdf differ diff --git a/lectures/09-binary-trees-traversals slides.pdf b/lectures/09-binary-trees-traversals slides.pdf index 1134dbd6ac9cfcd63963302af29abfe23aa2cee5..0290dbe2c9c6086029662c8fe663d53cf4ee2632 100644 Binary files a/lectures/09-binary-trees-traversals slides.pdf and b/lectures/09-binary-trees-traversals slides.pdf differ diff --git a/lectures/10-trees slides.pdf b/lectures/10-trees slides.pdf index 045ef4c97789f48f5e0007aa83e0682d3c59b1ee..ffe565a7777995c02e999beec0a90289698ebcd9 100644 Binary files a/lectures/10-trees slides.pdf and b/lectures/10-trees slides.pdf differ diff --git a/lectures/11-graphs slides.pdf b/lectures/11-graphs slides.pdf new file mode 100644 index 0000000000000000000000000000000000000000..dd255643c69bc7f0bbb7f471dbfef32328d730e0 Binary files /dev/null and b/lectures/11-graphs slides.pdf differ diff --git a/notebooks/08-binary-trees.ipynb b/notebooks/08-binary-trees.ipynb index 0c2e36db398e732613a3a72c9fbdee871f51762f..d4d90d26415f3e39bc98129501b221917e50f636 100644 --- a/notebooks/08-binary-trees.ipynb +++ b/notebooks/08-binary-trees.ipynb @@ -24,6 +24,24 @@ "---" ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "93dd9eb2", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "\n", + "#sys.path.append(\"../../\")\n", + "from utils import draw_directed_graph" + ] + }, { "cell_type": "markdown", "id": "f3ebe7d2", @@ -51,18 +69,12 @@ "source": [ "## Definitions\n", "\n", - "> Tree is a hierarchical data structure with nodes connected by edges\n", + "> A **Tree** is a hierarchical data structure with nodes (vertex) connected by links (edge)\n", "\n", "- A non-linear data structures (multiple ways to traverse it)\n", "- Nodes are connected by only one path (a series of edges) so trees have no cycle\n", "- Edges are also called links, they can be traversed in both ways (no orientation)\n", - "\n", - "We focus on _binary trees._\n", - "\n", - "> Trees that have at most two children\n", - "\n", - "- Children can be ordered left child and the right child\n", - "\n" + "- Trees are most commonly represented as a node-lin diagram, with the root at the top and the leaves (nodes without children) at the bottom)." ] }, { @@ -74,14 +86,20 @@ } }, "source": [ - "## Binary trees representation\n", + "## Binary trees\n", + "\n", + "We focus on _binary trees._\n", "\n", - "Trees are most commonly represented as a node-lin diagram, with the root at the top and the leaves (nodes without children) at the bottom)." + "> Trees that have at most two children\n", + "\n", + "- Children are ordered (left and right)\n", + "\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 278, + "execution_count": 4, "id": "51f0cf57", "metadata": { "slideshow": { @@ -98,69 +116,45 @@ "<!-- Generated by graphviz version 7.1.0 (20230121.1956)\n", " -->\n", "<!-- Pages: 1 -->\n", - "<svg width=\"212pt\" height=\"188pt\"\n", - " viewBox=\"0.00 0.00 212.19 188.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", - "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 184)\">\n", - "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-184 208.19,-184 208.19,4 -4,4\"/>\n", - "<!-- ROOT -->\n", + "<svg width=\"134pt\" height=\"116pt\"\n", + " viewBox=\"0.00 0.00 134.00 116.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", + "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 112)\">\n", + "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-112 130,-112 130,4 -4,4\"/>\n", + "<!-- A -->\n", "<g id=\"node1\" class=\"node\">\n", - "<title>ROOT</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"121.8\" cy=\"-162\" rx=\"34.39\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"121.8\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">ROOT</text>\n", + "<title>A</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"63\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"63\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">A</text>\n", "</g>\n", - "<!-- Node 1 -->\n", + "<!-- B -->\n", "<g id=\"node2\" class=\"node\">\n", - "<title>Node 1</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"76.8\" cy=\"-90\" rx=\"36.29\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"76.8\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">Node 1</text>\n", + "<title>B</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"27\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">B</text>\n", "</g>\n", - "<!-- ROOT->Node 1 -->\n", + "<!-- A->B -->\n", "<g id=\"edge1\" class=\"edge\">\n", - "<title>ROOT->Node 1</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M111.13,-144.41C105.86,-136.22 99.39,-126.14 93.48,-116.95\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"96.56,-115.26 88.2,-108.74 90.67,-119.05 96.56,-115.26\"/>\n", + "<title>A->B</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M54.65,-72.76C50.42,-64.55 45.19,-54.37 40.42,-45.09\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"43.68,-43.79 36,-36.49 37.46,-46.99 43.68,-43.79\"/>\n", "</g>\n", - "<!-- Node 2 -->\n", - "<g id=\"node5\" class=\"node\">\n", - "<title>Node 2</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"167.8\" cy=\"-90\" rx=\"36.29\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"167.8\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">Node 2</text>\n", - "</g>\n", - "<!-- ROOT->Node 2 -->\n", - "<g id=\"edge4\" class=\"edge\">\n", - "<title>ROOT->Node 2</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M132.47,-144.76C137.85,-136.58 144.5,-126.45 150.58,-117.2\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"153.47,-119.18 156.04,-108.9 147.62,-115.33 153.47,-119.18\"/>\n", - "</g>\n", - "<!-- Leaf 1 -->\n", + "<!-- C -->\n", "<g id=\"node3\" class=\"node\">\n", - "<title>Leaf 1</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"33.8\" cy=\"-18\" rx=\"33.6\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"33.8\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf 1</text>\n", + "<title>C</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"99\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">C</text>\n", "</g>\n", - "<!-- Node 1->Leaf 1 -->\n", + "<!-- A->C -->\n", "<g id=\"edge2\" class=\"edge\">\n", - "<title>Node 1->Leaf 1</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M66.61,-72.41C61.63,-64.3 55.51,-54.35 49.92,-45.25\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"52.92,-43.45 44.71,-36.76 46.96,-47.11 52.92,-43.45\"/>\n", - "</g>\n", - "<!-- Leaf 2 -->\n", - "<g id=\"node4\" class=\"node\">\n", - "<title>Leaf 2</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"119.8\" cy=\"-18\" rx=\"33.6\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"119.8\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">Leaf 2</text>\n", - "</g>\n", - "<!-- Node 1->Leaf 2 -->\n", - "<g id=\"edge3\" class=\"edge\">\n", - "<title>Node 1->Leaf 2</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M86.99,-72.41C91.97,-64.3 98.08,-54.35 103.67,-45.25\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"106.64,-47.11 108.89,-36.76 100.67,-43.45 106.64,-47.11\"/>\n", + "<title>A->C</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M71.35,-72.76C75.58,-64.55 80.81,-54.37 85.58,-45.09\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"88.54,-46.99 90,-36.49 82.32,-43.79 88.54,-46.99\"/>\n", "</g>\n", "</g>\n", "</svg>\n" ], "text/plain": [ - "<graphviz.graphs.Digraph at 0x104c33bb0>" + "<graphviz.graphs.Digraph at 0x1040b0f40>" ] }, "metadata": {}, @@ -168,7 +162,11 @@ } ], "source": [ - "draw_binary_tree(binary_tree)" + "T = {\n", + " 'A': ['B', 'C'],\n", + "}\n", + "\n", + "draw_directed_graph(T)" ] }, { @@ -208,14 +206,14 @@ } }, "source": [ - "## Binary trees data structures (dictionnaries and lists)\n", + "## Binary trees data structures (dict + lists)\n", "\n", "_Binary trees using dictionnaries where nodes are keys and edges are Lists._" ] }, { "cell_type": "code", - "execution_count": 199, + "execution_count": 5, "id": "d495c8a5", "metadata": {}, "outputs": [], @@ -243,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 200, + "execution_count": 6, "id": "5df1c518", "metadata": {}, "outputs": [], @@ -263,7 +261,7 @@ }, { "cell_type": "code", - "execution_count": 201, + "execution_count": 7, "id": "abc855b1", "metadata": {}, "outputs": [], @@ -329,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": 254, + "execution_count": 8, "id": "1fbb1c2f", "metadata": { "slideshow": { @@ -347,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 255, + "execution_count": 9, "id": "6b4492dc", "metadata": {}, "outputs": [ @@ -357,7 +355,7 @@ "'A'" ] }, - "execution_count": 255, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -368,7 +366,7 @@ }, { "cell_type": "code", - "execution_count": 256, + "execution_count": 10, "id": "ea01e802", "metadata": {}, "outputs": [], @@ -387,14 +385,14 @@ } }, "source": [ - "### Get the list of nodes\n", + "### Get all nodes from a Tree\n", "\n", "_Return all the nodes in the tree (as a list of nodes names)._" ] }, { "cell_type": "code", - "execution_count": 205, + "execution_count": 11, "id": "3af082b7", "metadata": { "slideshow": { @@ -409,7 +407,7 @@ }, { "cell_type": "code", - "execution_count": 206, + "execution_count": 12, "id": "ede5b5f4", "metadata": {}, "outputs": [ @@ -419,7 +417,7 @@ "['A', 'B', 'C', 'D', 'E']" ] }, - "execution_count": 206, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -430,7 +428,7 @@ }, { "cell_type": "code", - "execution_count": 208, + "execution_count": 13, "id": "2d3305d5", "metadata": {}, "outputs": [], @@ -448,14 +446,14 @@ } }, "source": [ - "### Get the list of edges\n", + "### Get all links from a Tree\n", "\n", - "_Return all the edges as a list of pairs as `Tuple`._" + "_Return all the links as a list of pairs as `Tuple`._" ] }, { "cell_type": "code", - "execution_count": 209, + "execution_count": 14, "id": "b50fe9c2", "metadata": { "slideshow": { @@ -464,17 +462,17 @@ }, "outputs": [], "source": [ - "def get_edges(graph):\n", - " edges = []\n", - " for node, neighbors in graph.items():\n", + "def get_links(tree):\n", + " links = []\n", + " for node, neighbors in tree.items():\n", " for neighbor in neighbors:\n", - " edges.append((node, neighbor))\n", - " return edges" + " links.append((node, neighbor))\n", + " return links" ] }, { "cell_type": "code", - "execution_count": 210, + "execution_count": 15, "id": "8958bd83", "metadata": {}, "outputs": [ @@ -484,24 +482,24 @@ "[('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E')]" ] }, - "execution_count": 210, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "get_edges(T)" + "get_links(T)" ] }, { "cell_type": "code", - "execution_count": 211, + "execution_count": 16, "id": "30dd31d3", "metadata": {}, "outputs": [], "source": [ - "assert get_edges(T) == [('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E')]\n", - "assert get_edges({}) == []" + "assert get_links(T) == [('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E')]\n", + "assert get_links({}) == []" ] }, { @@ -520,7 +518,7 @@ }, { "cell_type": "code", - "execution_count": 217, + "execution_count": 17, "id": "37fbe31b", "metadata": { "slideshow": { @@ -529,8 +527,8 @@ }, "outputs": [], "source": [ - "def get_parent(graph, node_to_find):\n", - " for parent, neighbors in graph.items():\n", + "def get_parent(tree, node_to_find):\n", + " for parent, neighbors in tree.items():\n", " if node_to_find in neighbors:\n", " return parent\n", " return None" @@ -538,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 218, + "execution_count": 18, "id": "78e88d23", "metadata": {}, "outputs": [], @@ -553,18 +551,18 @@ "id": "3fb6f347", "metadata": { "slideshow": { - "slide_type": "fragment" + "slide_type": "subslide" } }, "source": [ "### Check if the node is the root\n", "\n", - "_Return True if the root not, else `None`." + "_Return True if the root not, else `None`._" ] }, { "cell_type": "code", - "execution_count": 226, + "execution_count": 19, "id": "164e4ef7", "metadata": { "slideshow": { @@ -574,12 +572,12 @@ "outputs": [], "source": [ "def is_root(T, node):\n", - " return find_parent(T, node) is None" + " return get_parent(T, node) is None" ] }, { "cell_type": "code", - "execution_count": 227, + "execution_count": 20, "id": "5c053617", "metadata": {}, "outputs": [], @@ -603,7 +601,7 @@ }, { "cell_type": "code", - "execution_count": 228, + "execution_count": 21, "id": "ac145c20", "metadata": { "slideshow": { @@ -619,7 +617,7 @@ }, { "cell_type": "code", - "execution_count": 229, + "execution_count": 22, "id": "9444a66f", "metadata": {}, "outputs": [], @@ -645,7 +643,7 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": 23, "id": "77f5f17c", "metadata": { "slideshow": { @@ -660,7 +658,7 @@ }, { "cell_type": "code", - "execution_count": 234, + "execution_count": 24, "id": "5f5078d6", "metadata": {}, "outputs": [], @@ -689,7 +687,7 @@ }, { "cell_type": "code", - "execution_count": 267, + "execution_count": 25, "id": "c9312a43", "metadata": { "slideshow": { @@ -714,10 +712,21 @@ }, { "cell_type": "code", - "execution_count": 268, + "execution_count": 26, "id": "38181a0f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'A': ['F']}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "U = {\"A\": []}\n", "add_node(U, \"A\", 'F')\n", @@ -743,38 +752,7 @@ }, { "cell_type": "code", - "execution_count": 295, - "id": "ee2973b7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'A': ['B', 'C'], 'B': ['D', 'E'], 'C': [], 'D': [], 'E': []}" - ] - }, - "execution_count": 295, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "T" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b7ef1fab", - "metadata": {}, - "outputs": [], - "source": [ - "v" - ] - }, - { - "cell_type": "code", - "execution_count": 291, + "execution_count": 27, "id": "6ef9af29", "metadata": { "slideshow": { @@ -797,7 +775,7 @@ }, { "cell_type": "code", - "execution_count": 292, + "execution_count": 28, "id": "44a54ec8", "metadata": {}, "outputs": [], @@ -853,7 +831,9 @@ "\n", "In a complete or balanced binary tree: \n", "- if the index of a node is equal to $i$, then the position indicating its left child is at $2i$, \n", - "- and the position indicating its right child is at $2i + 1$." + "- and the position indicating its right child is at $2i + 1$.\n", + "\n", + "Also works for ternary trees, etc." ] }, { @@ -870,7 +850,7 @@ }, { "cell_type": "code", - "execution_count": 275, + "execution_count": 29, "id": "610ad3bb", "metadata": {}, "outputs": [ @@ -957,10 +937,10 @@ "</svg>\n" ], "text/plain": [ - "<graphviz.graphs.Digraph at 0x104c32b30>" + "<graphviz.graphs.Digraph at 0x1040b1000>" ] }, - "execution_count": 275, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -986,55 +966,8 @@ "dot.edge('0', '2', color='red')\n", "dot.edge('2', '3', color='red')\n", "\n", - "\n", - "dot # Render the graph" - ] - }, - { - "cell_type": "markdown", - "id": "01880f1d", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Visualize a tree" - ] - }, - { - "cell_type": "code", - "execution_count": 296, - "id": "3a064bfb", - "metadata": {}, - "outputs": [], - "source": [ - "from graphviz import Digraph\n", - "from IPython.display import display\n", - "\n", - "def draw_binary_tree(tree_dict):\n", - " dot = Digraph(format='png')\n", - " \n", - " def add_nodes_and_edges(node, parent_name=None):\n", - " if isinstance(node, dict):\n", - " for key, value in node.items():\n", - " dot.node(key, key)\n", - " if parent_name:\n", - " dot.edge(parent_name, key)\n", - " add_nodes_and_edges(value, key)\n", - "\n", - " add_nodes_and_edges(tree_dict)\n", - " \n", - " display(dot)" + "dot # render" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cb7726ee", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/notebooks/09-binary-trees-traversals.ipynb b/notebooks/09-binary-trees-traversals.ipynb index 7ba83bef2fe0234932750be0fdf0d8c07c16c86c..7a5bf682d4771ec18edcd9a45057b74ad5ecd700 100644 --- a/notebooks/09-binary-trees-traversals.ipynb +++ b/notebooks/09-binary-trees-traversals.ipynb @@ -28,6 +28,20 @@ "---" ] }, + { + "cell_type": "code", + "execution_count": 24, + "id": "c30e8f1b", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "from graphviz import Digraph\n", + "from IPython.display import display\n", + "from utils import draw_binary_tree" + ] + }, { "cell_type": "markdown", "id": "6efad77c", @@ -53,11 +67,13 @@ } }, "source": [ - "## Traversal methods\n", + "## Binary trees traversal methods\n", "\n", - "> Methodes to explore and process all the nodes in a binary tree\n", + "> Methods to explore and process nodes in a tree (or a graph).\n", "\n", - "- Because Trees are non-linear, there are multiple possible paths" + "- Because Trees are non-linear, there are multiple possible paths\n", + "- Can be applied to the whole tree or until a certain condition is met\n", + "- Traversals methods will provide very different results" ] }, { @@ -73,13 +89,14 @@ "\n", "<img src=\"figures/arbre-largeur-hauteur.png\" style=\"width: 400px\">\n", "\n", - "1. **Depth-First Traversal (DFS):**\n", + "1. **Depth-First search (DFS):**\n", " - visiting a node (sarting with the root)\n", " - then recursively traversing as deep as possible \n", " - then explore another branch.\n", "\n", - "2. **Breadth-First Traversal (BFS):**\n", - " - visiting a node (sarting with the root)\n", + "\n", + "2. **Breadth-First search (BFS):**\n", + " - visiting a node ( with the root)\n", " - explore all its neighbors (children) \n", " - then mode move to the children." ] @@ -93,52 +110,75 @@ } }, "source": [ - "## Depth-first\n", + "## Depth-first search (or traversal)\n", "\n", - "**Pseudo-code for Depth-First Traversal:**\n", + "> **Depth-first search (DFS)** is a traversal method that visits all the leaves first in a tree (or a graph).\n", "\n", - "1. Place the source node in the **stack**.\n", + "1. Place the source node in a **stack**.\n", "2. Remove the node from the top of the stack for processing.\n", "3. Add all unexplored neighbors to the stack (at the top).\n", "4. If the stack is not empty, go back to step 2.\n" ] }, + { + "cell_type": "markdown", + "id": "6e0cf5f3", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Depth-first search (or traversal)" + ] + }, { "cell_type": "code", - "execution_count": 60, - "id": "41790c1d", + "execution_count": 25, + "id": "e6d32fe3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A\n", - "B\n", - "D\n", - "E\n", - "F\n", - "C\n" - ] - } - ], + "outputs": [], "source": [ - "def dfs(graph, start):\n", + "def dfs(tree, start):\n", " stack = [start]\n", " while stack:\n", " vertex = stack.pop()\n", - " print(vertex) # traitement\n", - " stack.extend(graph[vertex])\n", - "\n", - "graph = {'A': set(['B', 'C']),\n", + " print(vertex, end = ' ') # traitement\n", + " stack.extend(tree[vertex])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "c49f305d", + "metadata": {}, + "outputs": [], + "source": [ + "tree = {'A': set(['B', 'C']),\n", " 'B': set(['D', 'E', 'F']),\n", " 'C': set([]),\n", " 'D': set([]),\n", " 'E': set([]),\n", " 'F': set([])\n", - " }\n", - "\n", - "dfs(graph, 'A') # A B D F E C" + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "b825ca4a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A B F E D C " + ] + } + ], + "source": [ + "dfs(tree, 'A') # A B D F E C" ] }, { @@ -150,15 +190,15 @@ } }, "source": [ - "## Depth-first traversal: pre-order, in-order, and post-order.\n", + "## Depth-first search: pre-order, in-order, and post-order.\n", "\n", - "For **depth-first traversal**, there are different types of processing: *pre-order*, *in-order*, and *post-order*.\n", + "For **depth-first search**, there are different types of processing: *pre-order*, *in-order*, and *post-order*, based on when the processing is done (before/after exploring the root or the children). Notation :\n", "\n", "- R = Root\n", "- D = Right subtree\n", "- G = Left subtree\n", "\n", - "There are three (main) types of traversal, observing the position of R:\n", + "There are three (main) types of traversal:\n", "\n", "- **Pre-order**: R G D\n", "- **In-order**: G R D\n", @@ -176,26 +216,26 @@ "source": [ "## Depth-first traversal: pre-order, in-order, and post-order.\n", "\n", - "For **depth-first traversal**, there are different types of processing: *pre-order*, *post-order*, and *in-order*.\n", + "Implementation of the strategies:\n", "\n", "```python\n", - "def Preorder(R):\n", + "def preorder(R):\n", " if not empty(R):\n", - " process(R) # Root\n", - " Preorder(left(R)) # Left\n", - " Preorder(right(R)) # Right\n", + " process(R) # Root\n", + " preorder(left(R)) # Left\n", + " preorder(right(R)) # Right\n", "\n", - "def Inorder(R):\n", + "def inorder(R):\n", " if not empty(R):\n", - " Inorder(left(R)) # Left\n", - " process(R) # Root\n", - " Inorder(right(R)) # Right\n", + " inorder(left(R)) # Left\n", + " process(R) # Root\n", + " inorder(right(R)) # Right\n", "\n", - "def Postorder(R):\n", + "def postorder(R):\n", " if not empty(R):\n", - " Postorder(left(R)) # Left\n", - " Postorder(right(R)) # Right\n", - " process(R) # Rooot\n" + " postorder(left(R)) # Left\n", + " postorder(right(R)) # Right\n", + " postorder(R) # Rooot\n" ] }, { @@ -207,138 +247,64 @@ } }, "source": [ - "## Depth-first traversal: pre-order\n", - "_Iterative implementation._" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "b0d812d9", - "metadata": {}, - "outputs": [], - "source": [ - "def iterative_inorder_traversal(root):\n", - " stack = []\n", - " current = root\n", - " while current is not None or stack:\n", - " while current is not None:\n", - " stack.append(current)\n", - " current = current.left\n", - " current = stack.pop()\n", - " print(current.value)\n", - " current = current.right" - ] - }, - { - "cell_type": "markdown", - "id": "f368960e", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Depth-first traversal: pre-order\n", - "_Recursive implementation._" + "## Example\n", + "\n", + "We will use this data structure" ] }, { "cell_type": "code", - "execution_count": 114, - "id": "552dc46b", + "execution_count": 28, + "id": "c0785d33", "metadata": {}, "outputs": [], "source": [ - "TT = {\"dog\": [\"little\", \"very\"],\n", - " \"little\": [\"the\"],\n", - " \"the\": [],\n", - " \"very\": [\"is\", \"cute\"],\n", - " \"is\": [],\n", - " \"cute\": []\n", - " }" + "class Node:\n", + " def __init__(self, value):\n", + " self.value = value\n", + " self.left = None\n", + " self.right = None\n", + " \n", + " def get_value(self):\n", + " return self.value\n", + " \n", + " def set_value(self, v = None):\n", + " self.value = v" ] }, { "cell_type": "code", - "execution_count": 117, - "id": "2cafcf1c", + "execution_count": 29, + "id": "ffcda583", "metadata": {}, "outputs": [], "source": [ - "def preorder(T, node):\n", - " if node is not None:\n", - " if len(T[node]) > 0:\n", - " preorder(T, T[node][0])\n", - " print(node)\n", - " if len(T[node]) > 1:\n", - " preorder(T, T[node][1])" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "id": "8b8f5c52", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "the\n", - "little\n", - "dog\n", - "is\n", - "very\n", - "cute\n" - ] - } - ], - "source": [ - "preorder(TT, \"dog\")" + "root = Node(\"dog\")\n", + "root.left = Node(\"little\")\n", + "root.left.left = Node(\"the\")\n", + "root.right = Node(\"very\")\n", + "root.right.left = Node(\"is\")\n", + "root.right.right = Node(\"cute\")" ] }, { - "cell_type": "code", - "execution_count": 68, - "id": "fbbb9408", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "the\n", - "little\n", - "dog\n", - "is\n", - "very\n", - "cute\n" - ] + "cell_type": "markdown", + "id": "5389c181", + "metadata": { + "slideshow": { + "slide_type": "subslide" } - ], + }, "source": [ - "def preorder_traversal(node):\n", - " if node is not None:\n", - " if node.left:\n", - " preorder_traversal(node.left)\n", - " print(node.value)\n", - " if node.right:\n", - " preorder_traversal(node.right)\n", + "## Example\n", "\n", - "root = Node(\"dog\")\n", - "root.left = Node(\"little\")\n", - "root.left.left = Node(\"the\")\n", - "root.right = Node(\"very\")\n", - "root.right.left = Node(\"is\")\n", - "root.right.right = Node(\"cute\")\n", - "preorder_traversal(root)" + "_How to get the sentence in the correct order?_" ] }, { "cell_type": "code", - "execution_count": 69, - "id": "72f665f2", + "execution_count": 30, + "id": "8cf38ed1", "metadata": {}, "outputs": [ { @@ -424,147 +390,337 @@ "</svg>\n" ], "text/plain": [ - "<graphviz.graphs.Digraph at 0x1045bead0>" + "<graphviz.graphs.Digraph at 0x106e7ec50>" ] }, - "execution_count": 69, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "visualize_oop(root)" + "draw_binary_tree(root)" ] }, { "cell_type": "markdown", - "id": "c88e5f58", + "id": "93045f0c", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ - "## Breadth-first traversal\n", - "\n", - "_Visit all the nodes in a tree or graph level by level._\n", - "\n", - "\n", - "```\n", - " 1\n", - " / \\\n", - " 2 3\n", - " / \\\n", - " 4 5\n", - "```" + "## Depth-first traversal pre-order (OOP + iterative)" ] }, { "cell_type": "code", - "execution_count": 81, - "id": "73ceb6d8", - "metadata": {}, + "execution_count": 31, + "id": "b0d812d9", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, "outputs": [], "source": [ - "def bfs_print(node):\n", - " if node is None:\n", - " return\n", - "\n", - " queue = [node]\n", - "\n", - " while queue:\n", - " current_node = queue.pop(0)\n", - " print(current_node.value, end=' ')\n", - "\n", - " if current_node.left:\n", - " queue.append(current_node.left)\n", - "\n", + "def iterative_inorder_traversal(node):\n", + " stack = [node]\n", + " while stack:\n", + " current_node = stack.pop()\n", + " print(current_node.value)\n", " if current_node.right:\n", - " queue.append(current_node.right)" + " stack.append(current_node.right)\n", + " if current_node.left:\n", + " stack.append(current_node.left)" ] }, { "cell_type": "code", - "execution_count": 82, - "id": "1e1a1f21", + "execution_count": 32, + "id": "b2dc9113", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "1 2 3 4 5 " + "dog\n", + "little\n", + "the\n", + "very\n", + "is\n", + "cute\n" ] } ], "source": [ - "root = Node(1)\n", - "root.left = Node(2)\n", - "root.right = Node(3)\n", - "root.left.left = Node(4)\n", - "root.left.right = Node(5)\n", - "bfs_print(root)" + "iterative_inorder_traversal(root)" ] }, { "cell_type": "markdown", - "id": "7ba7af33", + "id": "f368960e", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ - "## Utils" + "## Depth-first traversal pre-order (dict + recursive)\n", + "_Recursive implementation using a dictionnary data structure._" ] }, { "cell_type": "code", - "execution_count": 83, - "id": "2dc5a89d", + "execution_count": 33, + "id": "552dc46b", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "TT = {\"dog\": [\"little\", \"very\"],\n", + " \"little\": [\"the\"],\n", + " \"the\": [],\n", + " \"very\": [\"is\", \"cute\"],\n", + " \"is\": [],\n", + " \"cute\": []\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "2cafcf1c", "metadata": {}, "outputs": [], "source": [ - "import graphviz\n", - "from graphviz import Digraph\n", - "from IPython.display import display" + "def preorder(T, node):\n", + " if node is not None:\n", + " print(node)\n", + " if len(T[node]) > 0:\n", + " preorder(T, T[node][0])\n", + " if len(T[node]) > 1:\n", + " preorder(T, T[node][1])" ] }, { "cell_type": "code", - "execution_count": 84, - "id": "7058a231", + "execution_count": 35, + "id": "8b8f5c52", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dog\n", + "little\n", + "the\n", + "very\n", + "is\n", + "cute\n" + ] + } + ], + "source": [ + "preorder(TT, \"dog\")" + ] + }, + { + "cell_type": "markdown", + "id": "75c93b72", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "_Iterative version._" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "fbbb9408", "metadata": {}, "outputs": [], "source": [ - "def visualize_oop(root):\n", - " def build(node, dot=None):\n", - " if dot is None:\n", - " dot = graphviz.Digraph(format='png')\n", - "\n", - " if node is not None:\n", - " dot.node(str(node.value))\n", + "def preorder_traversal(T, node):\n", + " stack = [node]\n", + " \n", + " while stack:\n", + " current_node = stack.pop()\n", + " print(current_node)\n", + " \n", + " if len(T[current_node]) > 1:\n", + " stack.append(T[current_node][1])\n", + " \n", + " if len(T[current_node]) > 0:\n", + " stack.append(T[current_node][0])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "f040425d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dog\n", + "little\n", + "the\n", + "very\n", + "is\n", + "cute\n" + ] + } + ], + "source": [ + "preorder_traversal(TT, \"dog\")" + ] + }, + { + "cell_type": "markdown", + "id": "1aeacaad", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Solution: inorder traversal" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "8a950f0f", + "metadata": {}, + "outputs": [], + "source": [ + "def inorder(T, node):\n", + " if node is not None:\n", + " if len(T[node]) > 0:\n", + " inorder(T, T[node][0])\n", + " print(node)\n", + " if len(T[node]) > 1:\n", + " inorder(T, T[node][1])" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "49625d32", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the\n", + "little\n", + "dog\n", + "is\n", + "very\n", + "cute\n" + ] + } + ], + "source": [ + "inorder(TT, \"dog\")" + ] + }, + { + "cell_type": "markdown", + "id": "c88e5f58", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Breadth-first search (or traversal)\n", "\n", - " if node.left is not None:\n", - " dot.edge(str(node.value), str(node.left.value))\n", - " build(node.left, dot)\n", + "> **Breadth-first search (BFS)** is a traversal method that visits all the nodes in a tree (or a graph) level by level.\n", "\n", - " if node.right is not None:\n", - " dot.edge(str(node.value), str(node.right.value))\n", - " build(node.right, dot)\n", "\n", - " return dot\n", + "```\n", + " 1\n", + " / \\\n", + " 2 3\n", + " / \\\n", + " 4 5\n", + "```\n", "\n", - " return build(root)\n" + "The main difference will be that we use a Queue instead of a Stack" ] }, { "cell_type": "code", - "execution_count": null, - "id": "05b69326", + "execution_count": 40, + "id": "73ceb6d8", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def bfs_print(node):\n", + " if node is None:\n", + " return\n", + "\n", + " queue = [node]\n", + "\n", + " while queue:\n", + " current_node = queue.pop(0)\n", + " print(current_node.value, end=' ')\n", + "\n", + " if current_node.left:\n", + " queue.append(current_node.left)\n", + "\n", + " if current_node.right:\n", + " queue.append(current_node.right)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "1e1a1f21", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "root = Node(1)\n", + "root.left = Node(2)\n", + "root.right = Node(3)\n", + "root.left.left = Node(4)\n", + "root.left.right = Node(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "74a2317d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 2 3 4 5 " + ] + } + ], + "source": [ + "bfs_print(root)" + ] } ], "metadata": { diff --git a/notebooks/10-trees.ipynb b/notebooks/10-trees.ipynb index 5f29e68ab317339af079d125a68e01aa6770eeeb..b7ce4b33120817b4487e0fc8fbc1ca18c9ad06df 100644 --- a/notebooks/10-trees.ipynb +++ b/notebooks/10-trees.ipynb @@ -24,6 +24,20 @@ "---" ] }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a9f48d96", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "from graphviz import Digraph\n", + "from IPython.display import display\n", + "from utils import draw_tree_dict" + ] + }, { "cell_type": "markdown", "id": "f3ebe7d2", @@ -96,7 +110,7 @@ }, { "cell_type": "markdown", - "id": "ce722126", + "id": "67f767cf", "metadata": { "slideshow": { "slide_type": "subslide" @@ -118,7 +132,7 @@ }, { "cell_type": "markdown", - "id": "0612be20", + "id": "bb8e8697", "metadata": { "slideshow": { "slide_type": "subslide" @@ -132,12 +146,12 @@ }, { "cell_type": "code", - "execution_count": 147, - "id": "b66a9451", + "execution_count": 26, + "id": "53891826", "metadata": {}, "outputs": [], "source": [ - "tree = {\n", + "tree_dict = {\n", " \"a\": [\"b\", \"c\"],\n", " \"b\": [\"d\", \"e\"],\n", " \"c\": [\"f\"],\n", @@ -149,8 +163,8 @@ }, { "cell_type": "code", - "execution_count": 148, - "id": "f2bce0eb", + "execution_count": 27, + "id": "cf1c0607", "metadata": {}, "outputs": [ { @@ -236,7 +250,7 @@ "</svg>\n" ], "text/plain": [ - "<graphviz.graphs.Digraph at 0x1106595a0>" + "<graphviz.graphs.Digraph at 0x103f43d00>" ] }, "metadata": {}, @@ -244,12 +258,12 @@ } ], "source": [ - "draw_tree(tree)" + "draw_tree_dict(tree_dict)" ] }, { "cell_type": "markdown", - "id": "0aa22e17", + "id": "4f833d5f", "metadata": { "slideshow": { "slide_type": "subslide" @@ -263,12 +277,12 @@ }, { "cell_type": "code", - "execution_count": 168, - "id": "16070d37", + "execution_count": 28, + "id": "15a30278", "metadata": {}, "outputs": [], "source": [ - "tree = {\n", + "tree_dict_name = {\n", " \"a\": {\"neighbors\": [\"b\", \"c\"]},\n", " \"b\": {\"neighbors\": [\"d\", \"e\"]},\n", " \"c\": {\"neighbors\": [\"f\"]},\n", @@ -280,8 +294,8 @@ }, { "cell_type": "code", - "execution_count": 169, - "id": "4c182c85", + "execution_count": 29, + "id": "bf6182a0", "metadata": {}, "outputs": [ { @@ -290,13 +304,13 @@ "['b', 'c']" ] }, - "execution_count": 169, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "tree[\"a\"][\"neighbors\"]" + "tree_dict_name[\"a\"][\"neighbors\"]" ] }, { @@ -310,18 +324,17 @@ "source": [ "## Data structures (sets)\n", "\n", - "- The children are not ordered\n", - "- Children names are unique" + "- The children are unique and not ordered" ] }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 30, "id": "d996b53e", "metadata": {}, "outputs": [], "source": [ - "tree = {\n", + "tree_set = {\n", " \"a\": set([\"b\", \"c\"]),\n", " \"b\": set([\"d\", \"e\"]),\n", " \"c\": set([\"f\"]),\n", @@ -343,13 +356,13 @@ "## Data structures (lists of lists)\n", "\n", "- Each node is an entry in the list\n", - "- Childre are sub-lists" + "- Children are sub-lists" ] }, { "cell_type": "code", - "execution_count": 122, - "id": "1cfece72", + "execution_count": 31, + "id": "ed1ae4b2", "metadata": {}, "outputs": [], "source": [ @@ -376,17 +389,18 @@ "## Data structures (tuples)\n", "\n", "- Each node is the first tuple\n", - "- Children are additionnal tuply entries" + "- Children are additionnal tuply entries\n", + "- Warning: tuples are immutable (cannot be changed)" ] }, { "cell_type": "code", - "execution_count": 119, - "id": "0ed87f90", + "execution_count": 32, + "id": "36be872b", "metadata": {}, "outputs": [], "source": [ - "tree = (\"a\", [\n", + "tree_tuple = (\"a\", [\n", " (\"b\", []),\n", " (\"c\", [\n", " (\"d\", [\n", @@ -396,6 +410,27 @@ "])" ] }, + { + "cell_type": "code", + "execution_count": 33, + "id": "160d1a92", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'a'" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tree_tuple[0] # cannot be changed" + ] + }, { "cell_type": "markdown", "id": "f30c5bc6", @@ -405,16 +440,27 @@ } }, "source": [ - "## Class object\n", + "## Data structure (class object)\n", + "\n", + "How to create the tree? How to retrieve all nodes? Both iterative and recursive ways.\n", "\n", - "- The object contains a value and an unrestricted list of children" + "```python\n", + "class Node:\n", + " def __init__(self, value, children = []):\n", + " self.value = value\n", + " self.children = children\n", + "```" ] }, { "cell_type": "code", - "execution_count": 176, + "execution_count": 34, "id": "7d12baee", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, "outputs": [], "source": [ "class Node:\n", @@ -440,8 +486,8 @@ }, { "cell_type": "code", - "execution_count": 177, - "id": "a1b3ac88", + "execution_count": 35, + "id": "4e21434d", "metadata": { "slideshow": { "slide_type": "subslide" @@ -459,13 +505,13 @@ " ]),\n", "])\n", "\n", - "# or using root.children" + "# or using \"root.children\"" ] }, { "cell_type": "code", - "execution_count": 178, - "id": "48cb2472", + "execution_count": 36, + "id": "3ac188f3", "metadata": {}, "outputs": [ { @@ -474,7 +520,7 @@ "['a', 'b', 'd', 'e', 'c', 'f']" ] }, - "execution_count": 178, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -485,8 +531,8 @@ }, { "cell_type": "code", - "execution_count": 175, - "id": "95477007", + "execution_count": 37, + "id": "bf8772d3", "metadata": {}, "outputs": [ { @@ -495,7 +541,7 @@ "['a', 'c', 'f', 'b', 'e', 'd']" ] }, - "execution_count": 175, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -506,7 +552,7 @@ }, { "cell_type": "markdown", - "id": "f5e99024", + "id": "f8aefc1e", "metadata": { "slideshow": { "slide_type": "subslide" @@ -515,49 +561,49 @@ "source": [ "# Weighted trees\n", "\n", - "> Trees with a quantity associated to the edges\n", + "> Trees with a quantity associated to the links or the nodes\n", "\n", - "- Since we have a tree a way to store weights is using nodes values\n", - "- Root note weight is $0$" + "- Useful to quantifie both nodes and links\n", + "- Storing those values require additionnal data structures" ] }, { "cell_type": "markdown", - "id": "c4544dce", + "id": "b66a3424", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ - "## Data structures (dicts for edges)\n", + "## Data structures for weighted trees (dicts for edges)\n", "\n", - "- To encode values in edges we need to add an extra value" + "- We need to add an extra value to encode values in edges " ] }, { "cell_type": "code", - "execution_count": 134, - "id": "cc40dc55", + "execution_count": 38, + "id": "f26343c9", "metadata": {}, "outputs": [], "source": [ - "tree = {'a': [{'b': 0}, {'c': 0}],\n", + "tree_w_dict = {'a': [{'b': 0}, {'c': 0}],\n", " 'b': [{'d': 0}, {'e': 0}],\n", " 'c': [{'f': 0}],\n", " 'd': [],\n", " 'e': []\n", - " }\n" + " }" ] }, { "cell_type": "code", - "execution_count": 135, - "id": "69301406", + "execution_count": 39, + "id": "a50608c9", "metadata": {}, "outputs": [], "source": [ - "tree = {\n", + "tree_w_tuple = {\n", " 'a': [('b', 0), ('c', 0)],\n", " 'b': [('d', 0), ('e', 0)],\n", " 'c': [('f', 0)],\n", @@ -568,7 +614,7 @@ }, { "cell_type": "markdown", - "id": "44b1c278", + "id": "e5f7dfb7", "metadata": { "slideshow": { "slide_type": "subslide" @@ -580,8 +626,8 @@ }, { "cell_type": "code", - "execution_count": 136, - "id": "cad00333", + "execution_count": 40, + "id": "a81e7954", "metadata": {}, "outputs": [], "source": [ @@ -600,7 +646,7 @@ }, { "cell_type": "markdown", - "id": "d6e804ec", + "id": "50918649", "metadata": { "slideshow": { "slide_type": "subslide" @@ -609,13 +655,13 @@ "source": [ "## Exercise: Calculate the total weight of a tree\n", "\n", - "Go through all the nodes.." + "_Tip: go through all the nodes and get the edges, then sum their weights._" ] }, { "cell_type": "code", - "execution_count": 137, - "id": "a3d02030", + "execution_count": 41, + "id": "35793e38", "metadata": { "slideshow": { "slide_type": "fragment" @@ -639,8 +685,8 @@ }, { "cell_type": "code", - "execution_count": 138, - "id": "ea86d253", + "execution_count": 42, + "id": "8d143474", "metadata": {}, "outputs": [ { @@ -649,23 +695,23 @@ "[(1, 2, 5), (1, 3, 7)]" ] }, - "execution_count": 138, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "tree = Node_weight(1)\n", + "tree_w_oo = Node_weight(1)\n", "child1 = Node_weight(2, weight=5)\n", "child2 = Node_weight(3, weight=7)\n", - "tree.children = [child1, child2]\n", - "get_tree_edges(tree)" + "tree_w_oo.children = [child1, child2]\n", + "get_tree_edges(tree_w_oo)" ] }, { "cell_type": "code", - "execution_count": 139, - "id": "461303a6", + "execution_count": 43, + "id": "864d9a68", "metadata": {}, "outputs": [ { @@ -674,18 +720,18 @@ "12" ] }, - "execution_count": 139, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sum(tpl[2] for tpl in get_tree_edges(tree))" + "sum(tpl[2] for tpl in get_tree_edges(tree_w_oo))" ] }, { "cell_type": "markdown", - "id": "b5554daf", + "id": "cad8ffb6", "metadata": { "slideshow": { "slide_type": "subslide" @@ -699,8 +745,8 @@ }, { "cell_type": "code", - "execution_count": 142, - "id": "6118a8e3", + "execution_count": 44, + "id": "36b7cce1", "metadata": {}, "outputs": [], "source": [ @@ -713,8 +759,8 @@ }, { "cell_type": "code", - "execution_count": 143, - "id": "e9742977", + "execution_count": 45, + "id": "a3371c5a", "metadata": {}, "outputs": [ { @@ -723,33 +769,34 @@ "12" ] }, - "execution_count": 143, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "calculate_total_weight(tree)" + "calculate_total_weight(tree_w_oo)" ] }, { "cell_type": "markdown", - "id": "2949e143", + "id": "df3b5ca2", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ - "# An Edge class for edges\n", + "## An Edge class for edges\n", "\n", - "- To consider edges as objects" + "- To consider edges as objects\n", + "- Can be used as a complement of the nodes (or without the nodes)" ] }, { "cell_type": "code", - "execution_count": 145, - "id": "6958cf7d", + "execution_count": 46, + "id": "f8094971", "metadata": {}, "outputs": [], "source": [ @@ -771,40 +818,531 @@ }, { "cell_type": "markdown", - "id": "7dc8b845", + "id": "659ca77e", + "metadata": {}, + "source": [ + "## Main trees properties\n", + "\n", + "- Hierarchical structure\n", + "- No cycle\n", + "- All nodes connected\n", + "\n", + "We will mostly use one of the two traversal methods (BFS and DFS) to achieve this.\n", + "\n", + "Also we will using the dictionnary-based data structure:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d45a5bb6", + "metadata": {}, + "outputs": [], + "source": [ + "tree = {\n", + " \"A\": [\"B\", \"C\"],\n", + " \"B\": [\"D\", \"E\"],\n", + " \"C\": [\"F\", \"G\"],\n", + " \"D\": [\"H\", \"I\"],\n", + " \"E\": [\"J\"],\n", + " \"F\": [],\n", + " \"G\": [],\n", + " \"H\": [],\n", + " \"I\": [],\n", + " \"J\": []\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2737d1a8", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", + "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", + " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", + "<!-- Generated by graphviz version 7.1.0 (20230121.1956)\n", + " -->\n", + "<!-- Pages: 1 -->\n", + "<svg width=\"350pt\" height=\"260pt\"\n", + " viewBox=\"0.00 0.00 350.00 260.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", + "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 256)\">\n", + "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-256 346,-256 346,4 -4,4\"/>\n", + "<!-- A -->\n", + "<g id=\"node1\" class=\"node\">\n", + "<title>A</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"207\" cy=\"-234\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"207\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">A</text>\n", + "</g>\n", + "<!-- B -->\n", + "<g id=\"node2\" class=\"node\">\n", + "<title>B</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"171\" cy=\"-162\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"171\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">B</text>\n", + "</g>\n", + "<!-- A->B -->\n", + "<g id=\"edge1\" class=\"edge\">\n", + "<title>A->B</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M198.65,-216.76C194.42,-208.55 189.19,-198.37 184.42,-189.09\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"187.68,-187.79 180,-180.49 181.46,-190.99 187.68,-187.79\"/>\n", + "</g>\n", + "<!-- C -->\n", + "<g id=\"node3\" class=\"node\">\n", + "<title>C</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"243\" cy=\"-162\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"243\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">C</text>\n", + "</g>\n", + "<!-- A->C -->\n", + "<g id=\"edge2\" class=\"edge\">\n", + "<title>A->C</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M215.35,-216.76C219.58,-208.55 224.81,-198.37 229.58,-189.09\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"232.54,-190.99 234,-180.49 226.32,-187.79 232.54,-190.99\"/>\n", + "</g>\n", + "<!-- D -->\n", + "<g id=\"node4\" class=\"node\">\n", + "<title>D</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"99\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">D</text>\n", + "</g>\n", + "<!-- B->D -->\n", + "<g id=\"edge3\" class=\"edge\">\n", + "<title>B->D</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M156.08,-146.5C146.23,-136.92 133.14,-124.19 121.97,-113.34\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"124.59,-111 114.98,-106.54 119.71,-116.02 124.59,-111\"/>\n", + "</g>\n", + "<!-- E -->\n", + "<g id=\"node5\" class=\"node\">\n", + "<title>E</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"171\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"171\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">E</text>\n", + "</g>\n", + "<!-- B->E -->\n", + "<g id=\"edge4\" class=\"edge\">\n", + "<title>B->E</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M171,-143.7C171,-136.41 171,-127.73 171,-119.54\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"174.5,-119.62 171,-109.62 167.5,-119.62 174.5,-119.62\"/>\n", + "</g>\n", + "<!-- F -->\n", + "<g id=\"node6\" class=\"node\">\n", + "<title>F</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"243\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"243\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">F</text>\n", + "</g>\n", + "<!-- C->F -->\n", + "<g id=\"edge5\" class=\"edge\">\n", + "<title>C->F</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M243,-143.7C243,-136.41 243,-127.73 243,-119.54\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"246.5,-119.62 243,-109.62 239.5,-119.62 246.5,-119.62\"/>\n", + "</g>\n", + "<!-- G -->\n", + "<g id=\"node7\" class=\"node\">\n", + "<title>G</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"315\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"315\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">G</text>\n", + "</g>\n", + "<!-- C->G -->\n", + "<g id=\"edge6\" class=\"edge\">\n", + "<title>C->G</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M257.92,-146.5C267.77,-136.92 280.86,-124.19 292.03,-113.34\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"294.29,-116.02 299.02,-106.54 289.41,-111 294.29,-116.02\"/>\n", + "</g>\n", + "<!-- H -->\n", + "<g id=\"node8\" class=\"node\">\n", + "<title>H</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"27\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">H</text>\n", + "</g>\n", + "<!-- D->H -->\n", + "<g id=\"edge7\" class=\"edge\">\n", + "<title>D->H</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M84.08,-74.5C74.23,-64.92 61.14,-52.19 49.97,-41.34\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"52.59,-39 42.98,-34.54 47.71,-44.02 52.59,-39\"/>\n", + "</g>\n", + "<!-- I -->\n", + "<g id=\"node9\" class=\"node\">\n", + "<title>I</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"99\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">I</text>\n", + "</g>\n", + "<!-- D->I -->\n", + "<g id=\"edge8\" class=\"edge\">\n", + "<title>D->I</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M99,-71.7C99,-64.41 99,-55.73 99,-47.54\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"102.5,-47.62 99,-37.62 95.5,-47.62 102.5,-47.62\"/>\n", + "</g>\n", + "<!-- J -->\n", + "<g id=\"node10\" class=\"node\">\n", + "<title>J</title>\n", + "<ellipse fill=\"none\" stroke=\"black\" cx=\"171\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", + "<text text-anchor=\"middle\" x=\"171\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">J</text>\n", + "</g>\n", + "<!-- E->J -->\n", + "<g id=\"edge9\" class=\"edge\">\n", + "<title>E->J</title>\n", + "<path fill=\"none\" stroke=\"black\" d=\"M171,-71.7C171,-64.41 171,-55.73 171,-47.54\"/>\n", + "<polygon fill=\"black\" stroke=\"black\" points=\"174.5,-47.62 171,-37.62 167.5,-47.62 174.5,-47.62\"/>\n", + "</g>\n", + "</g>\n", + "</svg>\n" + ], + "text/plain": [ + "<graphviz.graphs.Digraph at 0x10a59bcd0>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "draw_tree_dict(tree)" + ] + }, + { + "cell_type": "markdown", + "id": "121006f2", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ - "# Visualize a tree" + "## Generalized BFS (Breadth-First Search)" ] }, { "cell_type": "code", - "execution_count": 146, - "id": "9054d359", + "execution_count": 9, + "id": "ba62dbbc", "metadata": {}, "outputs": [], "source": [ - "from graphviz import Digraph\n", - "from IPython.display import display\n", + "def bfs(tree, start_node):\n", + " queue = [start_node]\n", + " result = []\n", "\n", - "def draw_tree(T):\n", - " dot = Digraph(format='png')\n", + " while queue:\n", + " node = queue.pop(0)\n", + " result.append(node)\n", + " children = tree.get(node, [])\n", "\n", - " def add_nodes_and_edges(tree, parent_name=None):\n", - " for parent, children in tree.items():\n", - " dot.node(parent, parent)\n", - " if parent_name:\n", - " dot.edge(parent_name, parent)\n", - " add_nodes_and_edges({child: [] for child in children}, parent)\n", + " for child in children:\n", + " if child is not None:\n", + " queue.append(child)\n", "\n", - " add_nodes_and_edges(T)\n", - " \n", - " display(dot)" + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5ec0e5f9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']\n" + ] + } + ], + "source": [ + "print(bfs(tree, \"A\"))" ] + }, + { + "cell_type": "markdown", + "id": "18c6b5e6", + "metadata": {}, + "source": [ + "## Generalized DFS (Depth-First Search)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0968d63f", + "metadata": {}, + "outputs": [], + "source": [ + "def dfs(tree, start_node):\n", + " stack = [start_node]\n", + " result = []\n", + "\n", + " while stack:\n", + " node = stack.pop()\n", + " result.append(node)\n", + " children = tree.get(node, [])\n", + "\n", + " for child in children:\n", + " if child is not None:\n", + " stack.append(child)\n", + "\n", + " return result\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c3f173c", + "metadata": {}, + "outputs": [], + "source": [ + "print(dfs(tree, \"A\"))" + ] + }, + { + "cell_type": "markdown", + "id": "6c9a2f9f", + "metadata": {}, + "source": [ + "## Tree property: are all nodes connected?\n", + "\n", + "Without having a first node and re-using the dfs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "405e887d", + "metadata": {}, + "outputs": [], + "source": [ + "def is_tree_connected(tree, start_node):\n", + " if not tree:\n", + " return True # An empty tree is considered connected.\n", + "\n", + " visited = set()\n", + " stack = []\n", + "\n", + " stack.append(start_node)\n", + "\n", + " while stack:\n", + " node = stack.pop()\n", + " if node not in visited:\n", + " visited.add(node)\n", + " stack.extend(tree.get(node, []))\n", + "\n", + " return len(visited) == len(tree)\n", + "\n", + "\n", + "is_tree_connected(tree, \"A\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5852f357", + "metadata": {}, + "outputs": [], + "source": [ + "dfs_check_connected(tree, \"A\")" + ] + }, + { + "cell_type": "markdown", + "id": "f76000b9", + "metadata": {}, + "source": [ + "## Tree property: does the tree have a cycle?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a1907c6", + "metadata": {}, + "outputs": [], + "source": [ + "def has_cycle_dfs(root):\n", + " def dfs(node, parent, visited):\n", + " if node in visited:\n", + " if parent is not None and parent != visited[node]:\n", + " return True\n", + " return False\n", + "\n", + " visited[node] = parent\n", + "\n", + " for child in node.children:\n", + " if dfs(child, node, visited):\n", + " return True\n", + "\n", + " return False\n", + "\n", + " visited = {}\n", + " return dfs(root, None, visited)" + ] + }, + { + "cell_type": "markdown", + "id": "ffb17bd1", + "metadata": {}, + "source": [ + "## What if we add an extra node \"K\"?\n", + "\n", + "> tree[\"F\"] = [\"A\"]" + ] + }, + { + "cell_type": "markdown", + "id": "408393f0", + "metadata": {}, + "source": [ + "## Tree property: Check if the tree is an n-ary tree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91ef03ea", + "metadata": {}, + "outputs": [], + "source": [ + "def is_binary_tree(tree, node, n = 2, visited=None):\n", + " if visited is None:\n", + " visited = set()\n", + "\n", + " if node in visited:\n", + " return True\n", + "\n", + " visited.add(node)\n", + " children = tree.get(node, [])\n", + "\n", + " if len(children) > n:\n", + " return False\n", + "\n", + " for child in children:\n", + " if not is_binary_tree(tree, child, n, visited):\n", + " return False\n", + "\n", + " return True\n", + "\n", + "is_binary_tree(tree, \"A\", 2)" + ] + }, + { + "cell_type": "markdown", + "id": "e4279f90", + "metadata": {}, + "source": [ + "## Get all the edges of a tree" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "858814ab", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_edges(graph):\n", + " edges = []\n", + " for node, neighbors in graph.items():\n", + " for neighbor in neighbors:\n", + " edges.append((node, neighbor))\n", + " return edges" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "7ab4d047", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('A', 'B'),\n", + " ('A', 'C'),\n", + " ('B', 'D'),\n", + " ('B', 'E'),\n", + " ('C', 'F'),\n", + " ('C', 'G'),\n", + " ('D', 'H'),\n", + " ('D', 'I'),\n", + " ('E', 'J')]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "generate_edges(tree)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "3db5ca0d", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_edges_dfs(graph, start_node):\n", + " edges = []\n", + " stack = [start_node]\n", + " visited = []\n", + "\n", + " while stack:\n", + " node = stack.pop()\n", + " visited.append(node)\n", + " for neighbor in graph[node]:\n", + " if neighbor not in visited:\n", + " edges.append((node, neighbor))\n", + " stack.append(neighbor)\n", + "\n", + " return edges\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "f3425b5c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{('A', 'B'),\n", + " ('A', 'C'),\n", + " ('B', 'D'),\n", + " ('B', 'E'),\n", + " ('C', 'F'),\n", + " ('C', 'G'),\n", + " ('D', 'H'),\n", + " ('D', 'I'),\n", + " ('E', 'J')}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "generate_edges_dfs(tree, \"A\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7963ad24", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff0ba08e", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/notebooks/11-graphs.ipynb b/notebooks/11-graphs.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..fa910d207edce50f91032859227075510fc0747b --- /dev/null +++ b/notebooks/11-graphs.ipynb @@ -0,0 +1,2137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "09fc003e", + "metadata": {}, + "source": [ + "# UE5 Fundamentals of Algorithms\n", + "## Lecture 11: Graphs\n", + "### Ecole Centrale de Lyon, Bachelor of Science in Data Science for Responsible Business\n", + "#### Romain Vuillemot\n", + "<center><img src=\"figures/Logo_ECL.png\" style=\"width:300px\"></center>" + ] + }, + { + "cell_type": "markdown", + "id": "74743087", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ead453f3", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "from graphviz import Digraph\n", + "from IPython.display import display\n", + "from utils import visualize_graph_nx, visualize_graph_w" + ] + }, + { + "cell_type": "markdown", + "id": "f3ebe7d2", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Outline\n", + "\n", + "- Definitions\n", + "- Data structures\n", + "- Properties\n", + "- Weighted graphs and spanning trees\n", + "- Shortest paths" + ] + }, + { + "cell_type": "markdown", + "id": "a5c1dca1", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "# Graphs\n", + "\n", + "\n", + "> A **graph** is an abstract data structure consisting of a set of vertices connected by edges.\n", + "\n", + "- Trees are a specific case of a graph (acyclic, connected graphs)\n", + "\n", + "Examples:\n", + "\n", + "- Messaging: the traveling salesman problem, postal routes\n", + "- Communication networks\n", + "- Traffic management: flow problems, minimum congestion paths, ...\n", + "- Air navigation (aircraft in sky corridors!)\n", + "- Closed transportation system (closed circuit): goods delivery, TSP (Traveling Salesman Problem).\n", + "- Printed circuit board wiring" + ] + }, + { + "cell_type": "markdown", + "id": "30f450b7", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Definition\n", + "\n", + "\n", + "Graph $G = (V, E)$ with:\n", + "\n", + "- $V$: set of nodes (vertices).\n", + "- $E \\in (V \\times V)$: set of edges (links) or arcs (if oriented).\n", + "\n", + "Properties:\n", + "\n", + "- **Connected graph**: with a path between any pair of nodes.\n", + "- **Directed graphs**: where edges have a specific direction.\n", + "- **Weighted graphs**: numerical values associated with nodes or edges.\n", + "- **Strongly connected graphs**: directed graphs where there is a path from any node to any other node.\n", + "- **Bipartite**: vertices can be divided into two sets with no edges within a set.\n", + "- **Dense graph**: with a high edge-to-vertex ratio, often with $|E| = O(|V|^2)$.\n", + "- **Path**: a sequence of connected nodes with vertice.\n", + "- **Cycle**: a path that starts and ends at the same vertex.\n", + "- **Degree**: number of edges connected to a node." + ] + }, + { + "cell_type": "markdown", + "id": "5cc0eab1", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Data structures: dict\n", + "\n", + "- Using a dictionnary with adjacency list (similar to trees without cycles and non-connected nodes)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "63e36dcf", + "metadata": {}, + "outputs": [], + "source": [ + "g = { \"a\" : [\"d\"],\n", + " \"b\" : [\"c\"],\n", + " \"c\" : [\"b\", \"c\", \"d\", \"e\"],\n", + " \"d\" : [\"a\", \"c\"],\n", + " \"e\" : [\"c\"], \n", + " \"f\" : [] \n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c0413aac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['a', 'b', 'c', 'd', 'e', 'f'])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g.keys() # nodes" + ] + }, + { + "cell_type": "markdown", + "id": "b5995983", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Data structures: dict" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2ecde2a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('a', 'd'),\n", + " ('b', 'c'),\n", + " ('c', 'b'),\n", + " ('c', 'c'),\n", + " ('c', 'd'),\n", + " ('c', 'e'),\n", + " ('d', 'a'),\n", + " ('d', 'c'),\n", + " ('e', 'c')]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def generate_edges(graph):\n", + " edges = []\n", + " for node, neighbors in graph.items():\n", + " for neighbor in neighbors:\n", + " edges.append((node, neighbor))\n", + " return edges\n", + "\n", + "generate_edges(g) # edges" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f7d6c3d1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('a', 'd'),\n", + " ('b', 'c'),\n", + " ('c', 'b'),\n", + " ('c', 'c'),\n", + " ('c', 'd'),\n", + " ('c', 'e'),\n", + " ('d', 'a'),\n", + " ('d', 'c'),\n", + " ('e', 'c')]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[(vertex, neighbor) for vertex, neighbors \n", + " in g.items() for neighbor in neighbors]" + ] + }, + { + "cell_type": "markdown", + "id": "b39566f5", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graphs: node-link representation" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2e937dc7", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "visualize_graph_nx(g)" + ] + }, + { + "cell_type": "markdown", + "id": "e1996237", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## DFS\n", + "\n", + "> Depth-First Search (DFS) starts exploring graphs at a source node, explores as far as possible along each branch before backtracking. \n", + "\n", + "- Similar than for trees \n", + "- But needs to memorize visited nodes \n", + "\n", + "Steps:\n", + "\n", + "1. Put the source node into the stack.\n", + "2. Remove the node at the top of the stack to process it.\n", + "3. Put all unexplored neighbors into the stack (at the top).\n", + "4. If the stack is not empty, go back to step 2." + ] + }, + { + "cell_type": "markdown", + "id": "9a8d7a69", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## DFS with external visited list (iterative)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7a863da7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a d c b e " + ] + } + ], + "source": [ + "def dfs(graph, start_node):\n", + " visited = set()\n", + " stack = [start_node]\n", + "\n", + " while stack:\n", + " node = stack.pop()\n", + " if node not in visited:\n", + " print(node, end=' ')\n", + " visited.add(node)\n", + " for neighbor in reversed(graph[node]):\n", + " if neighbor not in visited:\n", + " stack.append(neighbor)\n", + " \n", + "dfs(g, 'a') # start from node 'a'." + ] + }, + { + "cell_type": "markdown", + "id": "865e6d9e", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## DFS with external visited list (recursive)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9fe25ca1", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a d c b e " + ] + } + ], + "source": [ + "def dfs_rec(graph, start_node, visited=set()):\n", + " if start_node not in visited:\n", + " print(start_node, end=' ')\n", + " visited.add(start_node)\n", + " for neighbor in graph[start_node]:\n", + " if neighbor not in visited:\n", + " dfs_rec(graph, neighbor, visited)\n", + "\n", + "dfs_rec(g, 'a') # start from node 'a'." + ] + }, + { + "cell_type": "markdown", + "id": "8c628bee", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## DFS with internal visited list (recursive)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "efeb1fa0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A B D E F C " + ] + } + ], + "source": [ + "def dfs(graph, start_node):\n", + " if start_node not in graph:\n", + " return\n", + "\n", + " print(start_node, end=' ')\n", + " graph[start_node]['visited'] = True\n", + "\n", + " for neighbor in graph[start_node]['neighbors']:\n", + " if not graph[neighbor]['visited']:\n", + " dfs(graph, neighbor)\n", + "\n", + "graph = {\n", + " 'A': {'neighbors': ['B', 'C'], 'visited': False},\n", + " 'B': {'neighbors': ['A', 'D', 'E'], 'visited': False},\n", + " 'C': {'neighbors': ['A', 'F'], 'visited': False},\n", + " 'D': {'neighbors': ['B'], 'visited': False},\n", + " 'E': {'neighbors': ['B', 'F'], 'visited': False},\n", + " 'F': {'neighbors': ['C', 'E'], 'visited': False}\n", + "}\n", + "\n", + "dfs(graph, 'A')" + ] + }, + { + "cell_type": "markdown", + "id": "2e6cee31", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## BFS\n", + "\n", + "> Breadth-First Search (BFS) starts exploring graphs at a source node, explores all of its neighbors at the current depth before moving on to nodes at the next depth level.\n", + "\n", + "- Similar to DFS, it also requires tracking visited nodes to avoid revisiting them.\n", + "\n", + "Steps for BFS:\n", + "\n", + "1. Put the source node into the queue.\n", + "2. Remove the node at the front of the queue to process it.\n", + "3. Explore all unvisited neighbors of the processed node and enqueue them at the back of the queue.\n", + "4. If the queue is not empty, go back to step 2." + ] + }, + { + "cell_type": "markdown", + "id": "67afb125", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## BFS with external visited list (iterative)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "99dcb888", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A B C D E F " + ] + } + ], + "source": [ + "def bfs(graph, start_node):\n", + " visited = set()\n", + " queue = [start_node]\n", + "\n", + " while queue:\n", + " node = queue.pop(0)\n", + " if node not in visited:\n", + " print(node, end=' ')\n", + " visited.add(node)\n", + " for neighbor in graph.get(node, []):\n", + " if neighbor not in visited:\n", + " queue.append(neighbor)\n", + "\n", + "graph = {\n", + " 'A': ['B', 'C'],\n", + " 'B': ['A', 'D', 'E'],\n", + " 'C': ['A', 'F'],\n", + " 'D': ['B'],\n", + " 'E': ['B', 'F'],\n", + " 'F': ['C', 'E']\n", + "}\n", + "\n", + "bfs(graph, 'A')" + ] + }, + { + "cell_type": "markdown", + "id": "b48f3a6e", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## BFS with backtracking\n", + "\n", + "To memorize the path used to visit nodes." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "071787a7", + "metadata": {}, + "outputs": [], + "source": [ + "def bfs_with_backtracking(graph, start_node, seeked_node):\n", + " visited = {node: False for node in graph}\n", + " path = {node: None for node in graph}\n", + " queue = [start_node]\n", + " found = False\n", + "\n", + " while queue:\n", + " current_node = queue.pop(0)\n", + " visited[current_node] = True\n", + "\n", + " for neighbor in graph[current_node]:\n", + " if not visited[neighbor]:\n", + " queue.append(neighbor)\n", + " visited[neighbor] = True\n", + " path[neighbor] = current_node\n", + "\n", + " if neighbor == seeked_node:\n", + " found = True\n", + " break\n", + "\n", + " if found:\n", + " break\n", + "\n", + " if not found:\n", + " return \"Path not found\"\n", + "\n", + " node = seeked_node\n", + " path_sequence = []\n", + " while node is not None:\n", + " path_sequence.insert(0, node)\n", + " node = path[node]\n", + "\n", + " return path_sequence" + ] + }, + { + "cell_type": "markdown", + "id": "7376c14d", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## BFS with backtracking\n", + "\n", + "Path re-construction from the BFS exploration:\n", + "\n", + "```python\n", + " if not found:\n", + " return \"Path not found\"\n", + "\n", + " node = seeked_node\n", + " path_sequence = []\n", + " while node is not None:\n", + " path_sequence.insert(0, node)\n", + " node = path[node]\n", + "\n", + " return path_sequence\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ebc07421", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Path from A to F: ['A', 'C', 'F']\n" + ] + } + ], + "source": [ + "graph = {\n", + " 'A': ['B', 'C'],\n", + " 'B': ['A', 'D', 'E'],\n", + " 'C': ['A', 'F'],\n", + " 'D': ['B'],\n", + " 'E': ['B', 'F'],\n", + " 'F': ['C', 'E']\n", + "}\n", + "\n", + "start_node = 'A'\n", + "seeked_node = 'F'\n", + "path = bfs_with_backtracking(graph, start_node, seeked_node)\n", + "print(f\"Path from {start_node} to {seeked_node}: {path}\")" + ] + }, + { + "cell_type": "markdown", + "id": "12bd1417", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graph property: path between two nodes?\n", + "\n", + "INPUT: a list of edges" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "4eae7ecd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There is a path between 0 and 5: True\n" + ] + } + ], + "source": [ + "def has_path(edges, n, start, end):\n", + " voisins = [[] for i in range(n)]\n", + " for i, j in edges:\n", + " voisins[i].append(j)\n", + " voisins[j].append(i)\n", + "\n", + " stack = [start]\n", + " visited = set(stack)\n", + " while stack:\n", + " cur = stack.pop()\n", + " if cur == end:\n", + " return True\n", + " for v in voisins[cur]:\n", + " if v not in visited:\n", + " stack.append(v)\n", + " visited.add(v)\n", + " return False\n", + "\n", + "edges = [(0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 5)]\n", + "num_nodes = 6 # number of unique nodes\n", + "start_node = 0; end_node = 5\n", + "result = has_path(edges, num_nodes, start_node, end_node)\n", + "print(f\"There is a path between {start_node} and {end_node}: {result}\")" + ] + }, + { + "cell_type": "markdown", + "id": "0f3d7abc", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Data structures: OOP\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5f5bbbc5", + "metadata": {}, + "outputs": [], + "source": [ + "class Graph:\n", + " def __init__(self):\n", + " self.graph = {}\n", + "\n", + " def add_vertex(self, vertex):\n", + " if vertex not in self.graph:\n", + " self.graph[vertex] = []\n", + "\n", + " def add_edge(self, vertex1, vertex2):\n", + " if vertex1 in self.graph and vertex2 in self.graph:\n", + " self.graph[vertex1].append(vertex2)\n", + " self.graph[vertex2].append(vertex1) \n", + "\n", + " def get_nodes(self):\n", + " return list(self.graph.keys())\n", + "\n", + " def get_edges(self):\n", + " edges = []\n", + " for vertex, neighbors in self.graph.items():\n", + " for neighbor in neighbors:\n", + " if (vertex, neighbor) not in edges and (neighbor, vertex) not in edges:\n", + " edges.append((vertex, neighbor))\n", + " return edges\n", + "\n", + " def __str__(self):\n", + " return str(self.graph)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b8565be4", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nodes: ['a', 'b', 'c', 'd', 'e', 'f']\n", + "Edges: [('a', 'd'), ('b', 'c'), ('c', 'c'), ('c', 'd'), ('c', 'e')]\n" + ] + } + ], + "source": [ + "g_obj = Graph()\n", + "\n", + "for vertex in [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"]:\n", + " g_obj.add_vertex(vertex)\n", + "\n", + "# Add edges\n", + "g_obj.add_edge(\"a\", \"d\")\n", + "g_obj.add_edge(\"b\", \"c\")\n", + "g_obj.add_edge(\"c\", \"b\")\n", + "g_obj.add_edge(\"c\", \"c\")\n", + "g_obj.add_edge(\"c\", \"d\")\n", + "g_obj.add_edge(\"c\", \"e\")\n", + "g_obj.add_edge(\"d\", \"a\")\n", + "g_obj.add_edge(\"d\", \"c\")\n", + "g_obj.add_edge(\"e\", \"c\")\n", + "\n", + "\n", + "print(\"Nodes:\", g_obj.get_nodes())\n", + "g_obj.get_edges() == generate_edges(g)\n", + "print(\"Edges:\", g_obj.get_edges())" + ] + }, + { + "cell_type": "markdown", + "id": "f33ed24b", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## DFS using oop\n", + "\n", + "(Only explores a single connex component)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "ac540bec", + "metadata": {}, + "outputs": [], + "source": [ + "def dfs(self, start_vertex, visited = set()):\n", + " stack = [start_vertex]\n", + "\n", + " while stack:\n", + " vertex = stack.pop()\n", + " if vertex not in visited:\n", + " print(vertex, end=' ')\n", + " visited.add(vertex)\n", + " neighbors = self.graph[vertex]\n", + " for neighbor in neighbors:\n", + " if neighbor not in visited:\n", + " stack.append(neighbor)\n", + "\n", + "Graph.dfs = dfs # update the Graph class" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "aec8122f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a d c e b " + ] + } + ], + "source": [ + "g_obj.dfs(\"a\")" + ] + }, + { + "cell_type": "markdown", + "id": "c5360edc", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## DFS using oop\n", + "\n", + "Explores all the graph components" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "85b84e94", + "metadata": {}, + "outputs": [], + "source": [ + "def components(self):\n", + " visited = set()\n", + "\n", + " for vertex in self.graph:\n", + " if vertex not in visited:\n", + " self.dfs(vertex, visited)\n", + " print()\n", + "Graph.components = components # update the Graph class" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "643b438b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a d c e b \n", + "f \n" + ] + } + ], + "source": [ + "g_obj.components()" + ] + }, + { + "cell_type": "markdown", + "id": "09e71c1e", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graph property: can a graph be n-colored?\n", + "\n", + "Two adjacent vertices (connected by an edge) cannot have the same color when properly colored. Example with $n = 2$ (i.e. can a graph be colored with 2 colors)." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "0395ed9d", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "class Node:\n", + " def __init__(self, v = None, n = []):\n", + " self.v = v\n", + " self.n = n\n", + " self.visited = False\n", + "\n", + "def two_color(r):\n", + " \n", + " stack = [r]\n", + " \n", + " while len(stack) > 0:\n", + " c = stack.pop(0)\n", + " for n in c.n:\n", + " if(c.v == n.v): # neighbours have same color\n", + " return False\n", + " if not n.visited:\n", + " stack.append(n)\n", + " n.visited = True \n", + "\n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "8fda865c", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "n1 = Node(\"gray\")\n", + "n2 = Node(\"black\")\n", + "n3 = Node(\"gray\")\n", + "n4 = Node(\"gray\")\n", + "n5 = Node(\"black\")\n", + "n6 = Node(\"gray\")\n", + "\n", + "n1.n = [n2]\n", + "n2.n = [n1, n3, n4]\n", + "n3.n = [n2, n5]\n", + "n4.n = [n2, n5]\n", + "n5.n = [n3, n4, n6]\n", + "n6.n = [n5]\n", + "\n", + "print(two_color(n1)) " + ] + }, + { + "cell_type": "markdown", + "id": "d6584072", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Data structure: Adjacency matrix\n", + "\n", + "- Square: it has the same number of rows and columns.\n", + "- A 1 in a cell $m_{ij}$ indicates a link between nodes `i` and `j`.\n", + "- A 1 on the diagonal would indicate a loop.\n", + "- It is symmetric: $m_{ij} = m_{ji}$ for an undirected graph.\n", + "- For valued graphs, cells contain values (instead of `1`).\n" + ] + }, + { + "cell_type": "markdown", + "id": "95501f5c", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Adjacency matrix (example)\n", + "\n", + "Given the graph `G`, what is its corresponding adjacency matrix?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "e045bef6", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 0, 0, 1, 0, 0]\n", + "[0, 0, 1, 0, 0, 0]\n", + "[0, 1, 1, 1, 1, 0]\n", + "[1, 0, 1, 0, 0, 0]\n", + "[0, 0, 1, 0, 0, 0]\n", + "[0, 0, 0, 0, 0, 0]\n" + ] + } + ], + "source": [ + "nodes = sorted(g.keys())\n", + "num_nodes = len(nodes)\n", + "adj_matrix = [[0] * num_nodes for _ in range(num_nodes)]\n", + "xf\n", + "for i, node in enumerate(nodes):\n", + " connected_nodes = g[node]\n", + " for connected_node in connected_nodes:\n", + " j = nodes.index(connected_node)\n", + " adj_matrix[i][j] = 1\n", + "\n", + "for row in adj_matrix:\n", + " print(row)" + ] + }, + { + "cell_type": "markdown", + "id": "99ade3d5", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Adjacency matrix (OOP)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "c1419547", + "metadata": {}, + "outputs": [], + "source": [ + "class GraphAdj:\n", + "\n", + " def __init__(self, n):\n", + " self.__n = n\n", + " self.__g = [[0 for _ in range(n)] for _ in range(n)]\n", + "\n", + " for i in range(0, self.__n):\n", + " for j in range(0, self.__n):\n", + " self.__g[i][j] = 0\n", + "\n", + "\n", + " def addEdge(self, x, y):\n", + " if (x < 0) or (x >= self.__n):\n", + " print(\"Vertex {} does not exist!\".format(x))\n", + " if (y < 0) or (y >= self.__n):\n", + " print(\"Vertex {} does not exist!\".format(y))\n", + "\n", + " if(x == y):\n", + " print(\"Same Vertex!\")\n", + " else:\n", + " self.__g[y][x] = 1\n", + " self.__g[x][y] = 1\n", + "\n", + " def displayAdjacencyMatrix(self):\n", + " for i in range(0, self.__n):\n", + " print()\n", + " for j in range(0, self.__n):\n", + " print(\"\", self.__g[i][j], end = \"\")\n", + "\n", + " def removeEdge(self, x, y):\n", + " if (x < 0) or (x >= self.__n):\n", + " print(\"Vertex {} does not exist!\".format(x))\n", + " if (y < 0) or (y >= self.__n):\n", + " print(\"Vertex {} does not exist!\".format(y))\n", + " if(x == y):\n", + " print(\"Same Vertex!\")\n", + " else:\n", + " self.__g[y][x] = 0\n", + " self.__g[x][y] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "079f999c", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " 0 1 1 1 1 0\n", + " 1 0 0 1 0 0\n", + " 1 0 0 1 1 1\n", + " 1 1 1 0 0 1\n", + " 1 0 1 0 0 0\n", + " 0 0 1 1 0 0" + ] + } + ], + "source": [ + "obj = GraphAdj(6)\n", + "\n", + "obj.addEdge(0, 1)\n", + "obj.addEdge(0, 2)\n", + "obj.addEdge(0, 3)\n", + "obj.addEdge(0, 4)\n", + "obj.addEdge(1, 3)\n", + "obj.addEdge(2, 3)\n", + "obj.addEdge(2, 4)\n", + "obj.addEdge(2, 5)\n", + "obj.addEdge(3, 5)\n", + "\n", + "obj.displayAdjacencyMatrix()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "ecf8ed5a", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " 0 1 1 1 1 0\n", + " 1 0 0 1 0 0\n", + " 1 0 0 0 1 1\n", + " 1 1 0 0 0 1\n", + " 1 0 1 0 0 0\n", + " 0 0 1 1 0 0" + ] + } + ], + "source": [ + "obj.removeEdge(2, 3);\n", + "obj.displayAdjacencyMatrix();" + ] + }, + { + "cell_type": "markdown", + "id": "d6ec3132", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graph property: is a graph connected? (matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "1542423b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The graph is connected: False\n" + ] + } + ], + "source": [ + "def is_connected(graph):\n", + " n = len(graph)\n", + " visited = [False] * n\n", + " stack = [0]\n", + " while stack:\n", + " node = stack.pop()\n", + " if not visited[node]:\n", + " visited[node] = True\n", + " for i in range(n):\n", + " if graph[node][i] == 1 and not visited[i]:\n", + " stack.append(i)\n", + " return visited.count(True) == len(graph)\n", + "\n", + "g_m = [\n", + " [0, 0, 0, 0, 0],\n", + " [0, 0, 1, 0, 0],\n", + " [0, 1, 0, 1, 0],\n", + " [0, 0, 1, 0, 1],\n", + " [0, 0, 0, 1, 0]\n", + "]\n", + "\n", + "# We do a DFS\n", + "is_graph_connected = is_connected(g_m)\n", + "print(f\"The graph is connected: {is_graph_connected}\")" + ] + }, + { + "cell_type": "markdown", + "id": "99e73122", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graph property: how many connected components? (matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "a4809d88", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "def dfs(adj_matrix, node, visited):\n", + " visited[node] = True\n", + " for neighbor, connected in enumerate(adj_matrix[node]):\n", + " if connected and not visited[neighbor]:\n", + " dfs(adj_matrix, neighbor, visited)\n", + "\n", + "def count_connected_components(adj_matrix):\n", + " num_nodes = len(adj_matrix)\n", + " visited = [False] * num_nodes\n", + " components = 0\n", + "\n", + " for i in range(num_nodes):\n", + " if not visited[i]:\n", + " dfs(adj_matrix, i, visited)\n", + " components += 1\n", + "\n", + " return components" + ] + }, + { + "cell_type": "markdown", + "id": "a886fa9c", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graph property: is there a self-connected node? (matrix)\n", + "\n", + "I.e is there for instance a node A $\\rightarrow$ A" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "81106cd6", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "def has_ones_in_diagonal(matrix):\n", + " for i in range(len(matrix)):\n", + " if matrix[i][i] == 1:\n", + " return True\n", + " return False" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "70044448", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We check if there is a `1` in the diagonal\n", + "has_ones_in_diagonal(g_m)" + ] + }, + { + "cell_type": "markdown", + "id": "083bc1ef", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graph property: is a graph oriented? (matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "9e0860f6", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "# check if the matrix is equal to its transpose.\n", + "def is_symmetric(matrix):\n", + " rows = len(matrix)\n", + " cols = len(matrix[0])\n", + "\n", + " for i in range(rows):\n", + " for j in range(cols):\n", + " if matrix[i][j] != matrix[j][i]:\n", + " return False\n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "0f6f774b", + "metadata": {}, + "outputs": [], + "source": [ + "g_empty = []\n", + "n = 5\n", + "for i in range(n):\n", + " row = []\n", + " for j in range(n):\n", + " row.append(0)\n", + " g_empty.append(row)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "0ae92b34", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 0, 0, 0, 0]\n", + "[0, 0, 0, 0, 0]\n", + "[0, 0, 0, 0, 0]\n", + "[0, 0, 0, 0, 0]\n", + "[0, 0, 0, 0, 0]\n" + ] + } + ], + "source": [ + "for r in g_empty:\n", + " print(r, end=\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "3928df4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "is_symmetric(g_empty)" + ] + }, + { + "cell_type": "markdown", + "id": "2be90e37", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graph property: is a graph connected? (dict)\n", + "\n", + "We check if the dfs equals the number of nodes." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "93eb41a0", + "metadata": {}, + "outputs": [], + "source": [ + "def is_connected(graph):\n", + " if not graph:\n", + " return True\n", + "\n", + " visited = set()\n", + " start_node = list(graph.keys())[0]\n", + "\n", + " def dfs(node):\n", + " visited.add(node)\n", + " for neighbor in graph[node]:\n", + " if neighbor not in visited:\n", + " dfs(neighbor)\n", + "\n", + " dfs(start_node)\n", + "\n", + " return len(visited) == len(graph)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "b53901d2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "is_connected(g)" + ] + }, + { + "cell_type": "markdown", + "id": "3c584a2a", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Weighted graphs\n", + "\n", + "Graph with numerical values associated with nodes or edges.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "e6aae369", + "metadata": {}, + "outputs": [], + "source": [ + "graph_w = {\n", + " \"a\": [(\"d\", 1)],\n", + " \"b\": [(\"c\", 3)],\n", + " \"c\": [(\"a\", 1), (\"b\", 3), (\"d\", 1), (\"e\", 1)],\n", + " \"d\": [(\"a\", 1), (\"c\", 1)],\n", + " \"e\": [(\"c\", 1)],\n", + " \"f\": []\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "e6ffdf83", + "metadata": {}, + "outputs": [], + "source": [ + "def greedy_heuristic_shortest_path(graph, start, end):\n", + " current_node = start\n", + " visited = set()\n", + "\n", + " while current_node != end:\n", + " visited.add(current_node)\n", + " min_weight = float('inf')\n", + " next_node = None\n", + "\n", + " # Find the neighboring unvisited node with the smallest weight\n", + " for neighbor, weight in graph[current_node]:\n", + " if neighbor not in visited and weight < min_weight:\n", + " min_weight = weight\n", + " next_node = neighbor\n", + "\n", + " if next_node is None:\n", + " return float('inf') # No path found\n", + "\n", + " current_node = next_node\n", + "\n", + " return 0 # Path found from start to end" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "93ae7fa1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "greedy_heuristic_shortest_path(graph_w, \"a\", \"e\")" + ] + }, + { + "cell_type": "markdown", + "id": "729aa7e8", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Weighted graphs" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "c4f13ab0", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "visualize_graph_w(graph_w)" + ] + }, + { + "cell_type": "markdown", + "id": "0eb464fd", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Spanning Trees\n", + "\n", + "> A **Minimum Spanning Tree (MST)** of a graph is a subset of edges that connects all vertices while minimizing the total sum of the edge values.\n", + "\n", + "- If a graph has $N$ vertices, its MST (Minimum Spanning Tree) will have $N-1$ edges.\n", + "\n", + "- A graph can have multiple spanning trees, but the MST is the one with the lowest weight.\n", + "\n", + "- A tree has only one spanning tree: itself.\n", + "\n", + "\n", + "Question: What is the minimum spanning tree of this graph?" + ] + }, + { + "cell_type": "markdown", + "id": "de9abb12", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + " <img src=\"figures/spanning-tree.png\" style=\"height:10cm;\">" + ] + }, + { + "cell_type": "markdown", + "id": "3a8c8f2a", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + " <img src=\"figures/spanning-tree-sol-1.png\" style=\"height: 10cm;\">" + ] + }, + { + "cell_type": "markdown", + "id": "95fa19f1", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + " <img src=\"figures/spanning-tree-sol-2.png\" style=\"height: 10cm;\">" + ] + }, + { + "cell_type": "markdown", + "id": "1e7c93c1", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Weighted Graph MST finding: Prim's Algorithm\n", + "\n", + "\n", + "1. Start with an initial tree reduced to a single vertex of the graph.\n", + "\n", + "2. At each iteration, expand the tree by adding the available free vertex with the smallest possible weight.\n", + "\n", + "3. Stop when the tree becomes spanning.\n", + "\n", + "\n", + "Programming Strategy?" + ] + }, + { + "cell_type": "markdown", + "id": "b6aeca7f", + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "source": [ + "Greedy" + ] + }, + { + "cell_type": "markdown", + "id": "02e04cbb", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + " <img src=\"figures/prim-kruskal.png\" style=\"height:15cm;\">" + ] + }, + { + "cell_type": "markdown", + "id": "1c710862", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Weighted Graph MST finding: Prim's Algorithm" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "30da5bfd", + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "from heapq import heapify, heappop, heappush\n", + "\n", + "def prim(graph):\n", + " mst = []\n", + " start_vertex = list(graph.keys())[0]\n", + " priority_queue = [(0, start_vertex)]\n", + " visited = set()\n", + " \n", + " while priority_queue:\n", + " weight, current_vertex = heappop(priority_queue)\n", + " if current_vertex not in visited:\n", + " mst.append((current_vertex, weight))\n", + " visited.add(current_vertex)\n", + " for neighbor, edge_weight in graph[current_vertex]:\n", + " if neighbor not in visited:\n", + " heappush(priority_queue, (edge_weight, neighbor))\n", + " return mst" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "bc4f1d67", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('a', 0), ('d', 1), ('c', 1), ('b', 1), ('e', 1)]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prim(graph_w)" + ] + }, + { + "cell_type": "markdown", + "id": "3589712a", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graphs: shortest paths\n", + "\n", + "What is the shortest path from $s \\rightarrow z$? \n", + "\n", + "<img src=\"figures/bellman-solo.png\" style=\"height:5cm;\">\n", + "\n", + "Approaches:\n", + "\n", + "1. **BFS with local minimum (greedy):**\n", + "2. **BFS with global minimum (dynamic programming):**\n", + "3. Other?" + ] + }, + { + "cell_type": "markdown", + "id": "b43e0f7b", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graphs: shortest paths (BFS)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "bbd17edb", + "metadata": {}, + "outputs": [], + "source": [ + "graph_s = {\n", + " \"s\": [(\"t\", 6), (\"y\", 7)],\n", + " \"t\": [(\"x\", 5), (\"y\", 8), (\"z\", -4)],\n", + " \"y\": [(\"x\", -3), (\"z\", 9)],\n", + " \"x\": [(\"t\", -2)],\n", + " \"z\": [(\"s\", 2), (\"x\", 7)]\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "2c7eaade", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "def bfs_path(graph, start, end):\n", + " if start == end:\n", + " return [start]\n", + "\n", + " visited = set()\n", + " queue = [(start, [], 0)]\n", + "\n", + " while queue:\n", + " queue.sort(key=lambda x: x[2])\n", + " current, path, cost = queue.pop(0)\n", + " visited.add(current)\n", + "\n", + " for neighbor, edge_cost in graph[current]:\n", + " if neighbor not in visited:\n", + " if neighbor == end:\n", + " return path + [current, neighbor]\n", + " queue.append((neighbor, path + [current], cost + edge_cost))\n", + "\n", + " return None\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "70e22a2d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Path from s to z : ['s', 't', 'z']\n" + ] + } + ], + "source": [ + "start_node = 's'\n", + "end_node = 'z'\n", + "\n", + "path = bfs_path(graph_s, start_node, end_node)\n", + "if path:\n", + " print(\"Path from\", start_node, \"to\", end_node, \":\", path)\n", + "else:\n", + " print(\"No path found from\", start_node, \"to\", end_node)" + ] + }, + { + "cell_type": "markdown", + "id": "73bd4b61", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Graphs: shortest paths (Bellman-Ford)\n", + "\n", + "- **Objective**: Determine the shortest paths from a single source to all other nodes in the graph.\n", + " \n", + "- **Initialization**: Assign an initial distance value of 0 to the source node and infinity to all other nodes.\n", + " \n", + "- **Iterative Relaxation of Edges**:\n", + "\n", + " - Perform $|V| - 1$ iterations ($V$ being the number of vertices).\n", + " \n", + " - For each edge $(u, v)$, update the distance if the distance to node $v$ through node $u$ is shorter than the current distance to $v$.\n", + " \n", + "- **Detection of Negative Cycles**:\n", + "\n", + " - After the $|V| - 1$ iterations, check for negative cycles by iterating through all edges.\n", + " \n", + " - If a shorter path is found, a negative cycle exists.\n" + ] + }, + { + "cell_type": "markdown", + "id": "185e01a2", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "<img src=\"figures/bellman-full.png\" style=\"height:10cm;\">" + ] + }, + { + "cell_type": "markdown", + "id": "45225f32", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + " <img src=\"figures/bellman-algo.png\" style=\"height:10cm;\">" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "c347393e", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "def bellman_ford(graph, src):\n", + " dist = {node: float(\"inf\") for node in graph}\n", + " dist[src] = 0\n", + "\n", + " for _ in range(len(graph) - 1):\n", + " for u in graph:\n", + " for v, w in graph[u]:\n", + " if dist[u] != float(\"inf\") and dist[u] + w < dist[v]:\n", + " dist[v] = dist[u] + w\n", + "\n", + " for u in graph:\n", + " for v, w in graph[u]:\n", + " if dist[u] != float(\"inf\") and dist[u] + w < dist[v]:\n", + " print(\"Le graphe contient des cycles négatifs\")\n", + " return\n", + "\n", + " return dist" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "64704f08", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'s': 0, 't': 2, 'y': 7, 'x': 4, 'z': -2}" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bellman_ford(graph_s, 's')" + ] + }, + { + "cell_type": "markdown", + "id": "94475748", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Dijkstra's Algorithm \n", + "\n", + "- **Objective:** Determine the shortest paths between sources $S$ and nodes in the graph accessible from $S$.\n", + "\n", + "- Incremental and greedy construction of a set of visited nodes $E$ accessible from initial vertex $S$.\n", + "\n", + "- **Initialization:** $E_{0}$ is an empty list and $G = \\{S\\}$.\n", + "\n", + "- Move to the next step:\n", + "\n", + " - $E_{i+1} = E_{i} \\cup \\{ $ node from $G$ outside of $E_{i}$ closest to $S$ by following a path that only passes through nodes in $E_{i} \\}$.\n", + "\n", + "- The vertices entering $E$ in ascending order of distance to $S$.\n", + "\n", + "\n", + "Warning: assumes costs $> 0$.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "949dbfe2", + "metadata": {}, + "outputs": [], + "source": [ + "graph_d = {\n", + " \"s\": [(\"t\", 6), (\"y\", 4)],\n", + " \"t\": [(\"x\", 3), (\"y\", 2)],\n", + " \"y\": [(\"t\", 1), (\"x\", 9), (\"z\", 3)],\n", + " \"x\": [(\"z\", 4)],\n", + " \"z\": [(\"s\", 7), (\"x\", 5)]\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "ceca3bde", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + " <img src=\"figures/dijkstra-algo.png\" style=\"height:10cm;\">" + ] + }, + { + "cell_type": "markdown", + "id": "6d612ac9", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + " <img src=\"figures/dijkstra-full.png\" style=\"height:10cm;\">" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "2770567c", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [], + "source": [ + "def dijkstra(graph, initial):\n", + " visited = {initial: 0}\n", + " path = {}\n", + " nodes = set(graph.keys())\n", + " while nodes:\n", + " min_node = None\n", + " for node in nodes:\n", + " if node in visited:\n", + " if min_node is None:\n", + " min_node = node\n", + " elif visited[node] < visited[min_node]:\n", + " min_node = node\n", + "\n", + " if min_node is None:\n", + " break\n", + "\n", + " nodes.remove(min_node)\n", + " current_weight = visited[min_node]\n", + " for edge, weight in graph[min_node]:\n", + " weight = current_weight + weight\n", + " if edge not in visited or weight < visited[edge]:\n", + " visited[edge] = weight\n", + " path[edge] = min_node\n", + "\n", + " return visited, path" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "7b04d1dc", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "({'s': 0, 't': 5, 'y': 4, 'x': 8, 'z': 7},\n", + " {'t': 'y', 'y': 's', 'x': 't', 'z': 'y'})" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dijkstra(graph_d, 's')" + ] + }, + { + "cell_type": "markdown", + "id": "82240937", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "## Summary of shortest path finding\n", + "\n", + "- Principle of minimizing a cost (optimal sub-problem)\n", + "- Principle of algorithms (Bellman-Ford, Dijkstra, Floyd-Warshall) is to overestimate the weights of the vertices and adjust the cost using a *relaxation* method.\n", + "- The Bellman-Ford algorithm is similar to Dijkstra's. We find the notion of relaxation: $d(j) \\rightarrow \\min(d(j), d(x) + G(x, j))$.\n", + "- Dijkstra does not tolerate negative costs and uses a priority queue to process edges in the correct order and relax each edge only once.\n", + "- Bellman-Ford processes edges in an arbitrary order. It tolerates negative costs. For these reasons, multiple iterations might be necessary.\n", + "- Dijkstra with a cost graph of $1$ resembles breadth-first search (the queue becomes a stack).\n", + "\n" + ] + } + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/figures/bellman-algo.png b/notebooks/figures/bellman-algo.png new file mode 100755 index 0000000000000000000000000000000000000000..762b830dab15c89cfbee4752b34bf3bb8e7f5d98 Binary files /dev/null and b/notebooks/figures/bellman-algo.png differ diff --git a/notebooks/figures/bellman-full.png b/notebooks/figures/bellman-full.png new file mode 100755 index 0000000000000000000000000000000000000000..6497fef7c849a6a8a0f36c87297f82242000cb27 Binary files /dev/null and b/notebooks/figures/bellman-full.png differ diff --git a/notebooks/figures/bellman-solo.png b/notebooks/figures/bellman-solo.png new file mode 100755 index 0000000000000000000000000000000000000000..70e9cc4ccaecba740b2e6968d5e6758f070fcd28 Binary files /dev/null and b/notebooks/figures/bellman-solo.png differ diff --git a/notebooks/figures/dijkstra-algo.png b/notebooks/figures/dijkstra-algo.png new file mode 100755 index 0000000000000000000000000000000000000000..cf8fd81a902d4ec2a9ad3ba0ced4b1354353bab1 Binary files /dev/null and b/notebooks/figures/dijkstra-algo.png differ diff --git a/notebooks/figures/dijkstra-full.png b/notebooks/figures/dijkstra-full.png new file mode 100755 index 0000000000000000000000000000000000000000..79e2a8bad7f8c121bec9151f1d13a0ac7c8f4acd Binary files /dev/null and b/notebooks/figures/dijkstra-full.png differ diff --git a/notebooks/figures/placeholder-h.png b/notebooks/figures/placeholder-h.png new file mode 100644 index 0000000000000000000000000000000000000000..545b93294eb8eb6121601d98c4ecaa52d0850459 Binary files /dev/null and b/notebooks/figures/placeholder-h.png differ diff --git a/notebooks/figures/placeholder.png b/notebooks/figures/placeholder.png index 2e3fd26830cccc32005da06088d7e79444aad707..778fa225d7776bb94391207309bf96a1f28281bc 100644 Binary files a/notebooks/figures/placeholder.png and b/notebooks/figures/placeholder.png differ diff --git a/notebooks/figures/prim-kruskal.png b/notebooks/figures/prim-kruskal.png new file mode 100755 index 0000000000000000000000000000000000000000..04d50314930a8602f165cc219687d0b2cec54afb Binary files /dev/null and b/notebooks/figures/prim-kruskal.png differ diff --git a/notebooks/figures/spanning-tree-sol-1.png b/notebooks/figures/spanning-tree-sol-1.png new file mode 100755 index 0000000000000000000000000000000000000000..2e9a6899c50ef20aee7b54cf6524add4c6f22f46 Binary files /dev/null and b/notebooks/figures/spanning-tree-sol-1.png differ diff --git a/notebooks/figures/spanning-tree-sol-2.png b/notebooks/figures/spanning-tree-sol-2.png new file mode 100755 index 0000000000000000000000000000000000000000..a35789a5a758a38e036c67193bcc6cabe6051feb Binary files /dev/null and b/notebooks/figures/spanning-tree-sol-2.png differ diff --git a/notebooks/figures/spanning-tree.png b/notebooks/figures/spanning-tree.png new file mode 100755 index 0000000000000000000000000000000000000000..576ca95d422767d94e049dfdb0923dfc41301860 Binary files /dev/null and b/notebooks/figures/spanning-tree.png differ