diff --git a/lectures-notebooks/03-lists-search-sort.ipynb b/lectures-notebooks/03-lists-search-sort.ipynb deleted file mode 100644 index cc8c8cd05f260bebfab488daa464b7188909ab52..0000000000000000000000000000000000000000 --- a/lectures-notebooks/03-lists-search-sort.ipynb +++ /dev/null @@ -1,1105 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "bec049ad", - "metadata": { - "nbgrader": { - "grade": false, - "grade_id": "cell-6892cebeacb58715", - "locked": true, - "schema_version": 3, - "solution": false, - "task": false - }, - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "# UE5 Fundamentals of Algorithms\n", - "## Lecture 3: Lists, search, sort\n", - "### Ecole Centrale de Lyon, Bachelor of Science in Data Science for Responsible Business\n", - "#### Romain Vuillemot\n", - "<center><img src=\"figures/Logo_ECL.png\" style=\"width:300px\"></center>\n" - ] - }, - { - "cell_type": "markdown", - "id": "fbbc2f0d", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Outline\n", - "- Definition and examples of lists\n", - "- Manipulate\n", - "- Search\n", - "- Sort\n", - "- Iterate\n", - "- Filter/Map" - ] - }, - { - "cell_type": "markdown", - "id": "ab21085a", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## What is a list?\n", - "\n", - "> A **list** is a linear data structure \n", - "\n", - "- A sequence of elements\n", - "- Each element has a position (index)\n", - "- The order of the elements is important\n", - "- The elements can be of any type\n", - "\n", - "Examples of lists:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4fcb6bdb", - "metadata": {}, - "outputs": [], - "source": [ - "numbers_list = [1, 2, 3, 4, 5]\n", - "strings_list = [\"apple\", \"banana\", \"cherry\", \"date\"]\n", - "mixed_list = [1, \"apple\", 3.14, True]\n", - "nested_list = [[1, 2, 3], [\"a\", \"b\", \"c\"], [True, False]]" - ] - }, - { - "cell_type": "markdown", - "id": "bb445f5c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### List operations\n", - "\n", - "- **Access** an element at a given position\n", - "- **Insert** an element at a given position\n", - "- **Remove** an element at a given position\n", - "- **Search** an element\n", - "- **Sort** the list\n", - "- **Reverse** the list" - ] - }, - { - "cell_type": "markdown", - "id": "cc05f88e", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Access\n", - "\n", - "> Return a value" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "33da6137", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1\n", - "2\n", - "3\n" - ] - } - ], - "source": [ - "L = [1, 2, 3]\n", - "\n", - "for val in L:\n", - " print(val)" - ] - }, - { - "cell_type": "markdown", - "id": "990e937d", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Example: is a list ordered?" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "05a3d562", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "True\n", - "False\n" - ] - } - ], - "source": [ - "def estordonnee(liste):\n", - "\n", - " for i in range(len(liste) - 1):\n", - " if liste[i + 1] < liste[i]:\n", - " return False\n", - " return True\n", - "\n", - "print(estordonnee([1,2,3,4]))\n", - "print(estordonnee([1,2,3,4,1]))" - ] - }, - { - "cell_type": "markdown", - "id": "3e822b32", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Insert\n", - "\n", - "> Add elements in the list (regardless the index)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5aad3402", - "metadata": {}, - "outputs": [], - "source": [ - "from time import time\n", - "\n", - "def compute_average(n):\n", - " data = []\n", - " start = time()\n", - " for k in range(n):\n", - " data.append(None)\n", - " end = time()\n", - " return (end - start) / n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "37c3b4a7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.5497207641601563e-07" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "compute_average(20)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "9c4be228", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1, 2, 99, 100, 3, 4, 5]\n" - ] - } - ], - "source": [ - "my_list = [1, 2, 3, 4, 5]\n", - "insert_elements = [99, 100]\n", - "my_list = my_list[:2] + insert_elements + my_list[2:]\n", - "print(my_list)" - ] - }, - { - "cell_type": "markdown", - "id": "5d95ee75", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "# Search\n", - "\n", - "> Given a list of elements, find the position of a given element\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "8a5dd42f", - "metadata": { - "nbgrader": { - "grade": false, - "grade_id": "cell-387c098b188d4707", - "locked": false, - "schema_version": 3, - "solution": true, - "task": false - } - }, - "outputs": [], - "source": [ - "def search_element_in_list(element, list):\n", - " for i in list:\n", - " if i == element:\n", - " return True\n", - " return False" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "a3e4ed1f", - "metadata": { - "nbgrader": { - "grade": true, - "grade_id": "cell-d5f44386312e81c0", - "locked": true, - "points": 1, - "schema_version": 3, - "solution": false, - "task": false - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3 found at index 2.\n" - ] - } - ], - "source": [ - "L = [1, 2, 3, 4, 5]\n", - "element_to_find = 3\n", - "\n", - "try:\n", - " index = L.index(element_to_find)\n", - " print(f\"{element_to_find} found at index {index}.\")\n", - "except ValueError:\n", - " print(f\"{element_to_find} is not in the list.\")" - ] - }, - { - "cell_type": "markdown", - "id": "52399db0", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Example: Binary search (pseudo code)\n", - "\n", - "- **Input**: A sorted list (array) and a target value to find.\n", - "- **Initialization**:\n", - " - Set a pointer `left` to the beginning of the list (index 0).\n", - " - Set a pointer `right` to the end of the list (index equal to the length of the list minus one).\n", - "- **Search**:\n", - " - While `left` is less than or equal to `right`:\n", - " - Calculate the middle index as `mid` by adding `left` and `right` and then dividing by 2.\n", - " - Check if the element at index `mid` in the list is equal to the target value:\n", - " - If it is, you've found the target, so return `mid`.\n", - " - If the element at index `mid` is less than the target:\n", - " - Update `left` to `mid + 1` to search in the right half of the list.\n", - " - If the element at index `mid` is greater than the target:\n", - " - Update `right` to `mid - 1` to search in the left half of the list.\n", - "- **Result**:\n", - " - If you've gone through the entire loop and haven't found the target, return -1 to indicate that the target is not in the list." - ] - }, - { - "cell_type": "markdown", - "id": "57327e04", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Example: Binary search (pseudo code)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "1197437b", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Element 5 found at index 4.\n" - ] - } - ], - "source": [ - "def binary_search(arr, target):\n", - " left, right = 0, len(arr) - 1\n", - "\n", - " while left <= right:\n", - " mid = (left + right) // 2 \n", - "\n", - " if arr[mid] == target:\n", - " return mid \n", - " elif arr[mid] < target:\n", - " left = mid + 1 \n", - " else:\n", - " right = mid - 1 \n", - "\n", - " return -1 \n", - "\n", - "ordered_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", - "target_element = 5\n", - "result = binary_search(ordered_list, target_element)\n", - "\n", - "if result != -1:\n", - " print(f\"Element {target_element} found at index {result}.\")\n", - "else:\n", - " print(f\"Element {target_element} not found in the list.\")" - ] - }, - { - "cell_type": "markdown", - "id": "08843d8f", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "# Sort\n", - "\n", - "> Given a list of elements, sort the elements according to a given order\n", - "\n", - "- **Ascending** order\n", - "- **Descending** order\n", - "- **Alphabetical** order\n", - "- **Reverse** order\n", - "- **Custom** order\n", - "\n", - "NB: Sort is more complex and will be studied later on." - ] - }, - { - "cell_type": "markdown", - "id": "879f8d7c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Many ways to sort a list " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dead2e0b", - "metadata": {}, - "outputs": [], - "source": [ - "my_list = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]\n", - "sorted_list = sorted(my_list)\n", - "print(sorted_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "01e4d112", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1, 1, 2, 3, 3, 4, 5, 5, 5, 6, 9]\n" - ] - } - ], - "source": [ - "numbers = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]\n", - "sorted(numbers) # in place operation (changes the original variable)\n", - "print(sorted_numbers)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "e2c9277a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[9, 6, 5, 5, 5, 4, 3, 3, 2, 1, 1]\n" - ] - } - ], - "source": [ - "numbers = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]\n", - "numbers.sort(reverse=True) # did not change the original variable\n", - "print(numbers)" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "id": "34975051", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['date', 'apple', 'cherry', 'banana']\n" - ] - } - ], - "source": [ - "words = ['apple', 'cherry', 'banana', 'date']\n", - "sorted_words = sorted(words, key=len)\n", - "print(sorted_words)" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "id": "7fcf7380", - "metadata": {}, - "outputs": [], - "source": [ - "import functools" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "id": "547f8c13", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['apple', 'banana', 'cherry', 'date']" - ] - }, - "execution_count": 98, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "words = ['apple', 'cherry', 'banana', 'date']\n", - "sorted(words, key=functools.cmp_to_key(order_by_alphabetical_order))" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "id": "357d7c7a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['apple', 'banana', 'cherry', 'date']" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "words = ['apple', 'cherry', 'banana', 'date']\n", - "sorted(words, key=functools.cmp_to_key(lambda x, y: ord(x[0]) - ord(y[0])))" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "id": "90102b6a", - "metadata": {}, - "outputs": [], - "source": [ - "def order_by_alphabetical_order(a, b):\n", - " return ord(a[0]) - ord(b[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "id": "6afa72f9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1" - ] - }, - "execution_count": 99, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "order_by_alphabetical_order(\"cherry\", \"banana\")" - ] - }, - { - "cell_type": "markdown", - "id": "ee60b8ff", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Enumerators\n", - "\n", - "> Enables to turn a list into a list of index + value\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "c1bdb416", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 1\n", - "1 2\n", - "2 3\n", - "0 1 A\n", - "1 2 B\n", - "2 3 C\n" - ] - } - ], - "source": [ - "a = [1, 2, 3]\n", - "b = [\"A\", \"B\", \"C\"]\n", - "\n", - "for index, value in enumerate(L):\n", - " print(index, value)\n", - " \n", - "for i, (x, y) in enumerate(zip(a, b)):\n", - " print(i, x, y)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "15eebc0d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1\n", - "2\n", - "3\n" - ] - } - ], - "source": [ - "iterable = [1, 2, 3]\n", - "iterator = iter(iterable)\n", - "\n", - "try:\n", - " while True:\n", - " item = next(iterator)\n", - " print(item)\n", - "except StopIteration:\n", - " pass" - ] - }, - { - "cell_type": "markdown", - "id": "e97201fc", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Iterators" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bc13a581", - "metadata": {}, - "outputs": [], - "source": [ - "iterable = [1, 2, 3, 4, 5]\n", - "iterator = iter(iterable)\n", - "\n", - "try:\n", - " while True:\n", - " item = next(iterator)\n", - " print(item)\n", - "except StopIteration:\n", - " pass" - ] - }, - { - "cell_type": "markdown", - "id": "8297afc9", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Generators" - ] - }, - { - "cell_type": "markdown", - "id": "269c01b9", - "metadata": {}, - "source": [ - "Using generators" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "289eb2ba", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1\n", - "2\n", - "3\n" - ] - } - ], - "source": [ - "def my_iterator():\n", - " data = [1, 2, 3]\n", - " for item in data:\n", - " yield item\n", - "\n", - "for item in my_iterator():\n", - " print(item)" - ] - }, - { - "cell_type": "markdown", - "id": "c5bf0281", - "metadata": {}, - "source": [ - "Fibonacci with generators" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "ff445529", - "metadata": {}, - "outputs": [], - "source": [ - "def fib_generator():\n", - " a, b = 0, 1\n", - " while True:\n", - " yield a\n", - " a, b = b, a + b" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "e8aa68b2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0\n", - "1\n", - "1\n", - "2\n", - "3\n", - "5\n", - "8\n", - "13\n", - "21\n", - "34\n" - ] - } - ], - "source": [ - "fib = fib_generator()\n", - "for _ in range(10):\n", - " print(next(fib))" - ] - }, - { - "cell_type": "markdown", - "id": "0a882d1d", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Linked list\n", - "\n", - "> A **_linked_ list** is a sequence of values (or objects) called *nodes* that are connected to each other in order to facilitate their storage and retrieval.\n", - "\n", - "\n", - "- The first node is called the head, the last node is the tail, and it points to `null`.\n", - "\n", - "- This structure allows for a flexible approach to manipulating objects: increasing their number, order, etc.\n", - "\n", - "- Especially allows for **dynamic memory allocation**, whereas an array needs to allocate all the space before being filled.\n", - "\n", - "- On the other hand, it requires linear search time (unlike arrays), and can be problematic for implementing a stack.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "53534019", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 -> 2 -> 3 -> None\n" - ] - } - ], - "source": [ - "linked_list = None\n", - "\n", - "def append(data):\n", - " global linked_list\n", - " if linked_list is None:\n", - " linked_list = {\"data\": data, \"next\": None}\n", - " else:\n", - " current = linked_list\n", - " while current[\"next\"]:\n", - " current = current[\"next\"]\n", - " current[\"next\"] = {\"data\": data, \"next\": None}\n", - "\n", - "def traverse():\n", - " current = linked_list\n", - " while current:\n", - " print(current[\"data\"], end=\" -> \")\n", - " current = current[\"next\"]\n", - " print(\"None\")\n", - "\n", - "append(1)\n", - "append(2)\n", - "append(3)\n", - "\n", - "traverse()" - ] - }, - { - "cell_type": "markdown", - "id": "28564ab2", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Filter\n", - "\n", - "Return a sublist given a criteri" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "3012f2e3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[6, 7, 8, 9]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "x = range(10)\n", - "list(x)\n", - "list(filter(lambda x : x > 5, x))" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "8de20627", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[2, 3]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[1, 2, 3][1:]" - ] - }, - { - "cell_type": "markdown", - "id": "fe92613c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Map\n", - "\n", - "Apply a function a list of values" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "435094a4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(map(lambda x : x * x, x))" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "f5b8712a", - "metadata": {}, - "outputs": [], - "source": [ - "L = [1, 2, 3]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "edfc966b", - "metadata": {}, - "outputs": [], - "source": [ - "L.append(4)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "f61d0ac8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[1, 2, 3, 4]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "L" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "fe60f8fc", - "metadata": {}, - "outputs": [], - "source": [ - "L = L + [5, 6, 7]" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "e30fa183", - "metadata": {}, - "outputs": [], - "source": [ - "L.extend([2, 4, 5])" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "e96a50b2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[1, 2, 3, 4, [5, 6, 7], 2, 4, 5, 5, 6, 7, 2, 4, 5]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "L" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "bd7930ff", - "metadata": {}, - "outputs": [], - "source": [ - "L = [3, 4, 5]" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "601e1691", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'L' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mL\u001b[49m\u001b[38;5;241m.\u001b[39mindex(\u001b[38;5;241m10\u001b[39m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'L' is not defined" - ] - } - ], - "source": [ - "L.index(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8185d9b9", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "celltoolbar": "Slideshow", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/lectures-notebooks/07-stacks-queues.ipynb b/lectures-notebooks/03-stacks-queues.ipynb similarity index 75% rename from lectures-notebooks/07-stacks-queues.ipynb rename to lectures-notebooks/03-stacks-queues.ipynb index 740b4b6eae83683ab8aa69dd02070d31405d62b2..34c9e88de19a945ed9948e1f79ca246a2191ca00 100644 --- a/lectures-notebooks/07-stacks-queues.ipynb +++ b/lectures-notebooks/03-stacks-queues.ipynb @@ -10,10 +10,13 @@ }, "source": [ "# UE5 Fundamentals of Algorithms\n", - "## Lecture 7: Stacks and queues\n", + "## Lecture 3: Stacks and Queues\n", "### Ecole Centrale de Lyon, Bachelor of Science in Data Science for Responsible Business\n", - "#### Romain Vuillemot\n", - "<center><img src=\"figures/Logo_ECL.png\" style=\"width:300px\"></center>" + "#### [Romain Vuillemot](https://romain.vuillemot.net/)\n", + "<div style=\"text-align: center;\">\n", + " <img src=\"figures/logo-ecl.png\" style=\"width:250px; display:inline-block; vertical-align:middle;\"> \n", + " <img src=\"figures/logo-emlyon.png\" style=\"width:75px; display:inline-block; vertical-align:middle;\">\n", + "</div>" ] }, { @@ -82,7 +85,9 @@ "- `push()`: Adds an element.\n", "- `size()`: Returns the size of the list.\n", "- `reverse()`: Reverses the order of elements.\n", - "- `peek()`: Returns an element (without removing it)." + "- `peek()`: Returns an element (without removing it).\n", + "\n", + "More operations can be included." ] }, { @@ -117,19 +122,19 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 3, "id": "6326e146", "metadata": {}, "outputs": [], "source": [ "stack = [3, 4, 5]\n", "stack.append(6) # push\n", - "stack.append(7)" + "stack.append(7) # push" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 4, "id": "6604a331", "metadata": {}, "outputs": [ @@ -148,7 +153,7 @@ "print(stack)\n", "stack.pop() # get\n", "print(stack)\n", - "stack.pop()\n", + "stack.pop() \n", "stack.pop()\n", "print(stack)\n", "print(stack[-1]) # peek" @@ -170,15 +175,17 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 5, "id": "b467ca83", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "4 3 2 1 0 " + "7 6 5 4 3 " ] } ], @@ -186,7 +193,10 @@ "import queue\n", "pile = queue.LifoQueue()\n", "\n", - "for i in range(5): pile.put(i)\n", + "for i in [3, 4, 5]: pile.put(i)\n", + "\n", + "pile.put(6)\n", + "pile.put(7)\n", "\n", "while not pile.empty(): \n", " print(pile.get(), end=\" \")" @@ -203,17 +213,18 @@ "source": [ "### Stacks (using OOP)\n", "\n", - "_Internally, will be based on an `Array` structure._" + "Internally, it be based on an `List` structure." ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 6, "id": "8ae9a611", "metadata": { "slideshow": { "slide_type": "fragment" - } + }, + "tags": [] }, "outputs": [], "source": [ @@ -253,27 +264,27 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 7, "id": "c61545dc", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "A\n", - "B\n", - "C\n" + "3\n", + "4\n", + "5\n" ] } ], "source": [ - "data = [\"A\", \"B\", \"C\"]\n", - "\n", "s = Stack()\n", - "for d in data:\n", + "for d in [3, 4, 5]:\n", " s.push(d)\n", - " e = s.pop()\n", + " e = s.get()\n", " print(e)" ] }, @@ -288,9 +299,9 @@ "source": [ "## Queues\n", "\n", - "> A stack is an abstract data type that follows the First-In, First-Out (FIFO) principle\n", + "> A queue is an abstract data type that follows the Last-In, First-Out (LIFO) principle\n", "\n", - "- Similar to a Srtack\n", + "- Similar to a Stack\n", "- But the returned element is the first one inserted" ] }, @@ -308,7 +319,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 8, "id": "b2cc2dd0", "metadata": {}, "outputs": [], @@ -320,7 +331,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 9, "id": "1157237e", "metadata": {}, "outputs": [ @@ -359,7 +370,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 11, "id": "a6392890", "metadata": {}, "outputs": [ @@ -367,7 +378,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "0 1 2 3 4 " + "3 4 5 " ] } ], @@ -376,9 +387,9 @@ "\n", "q = queue.Queue()\n", "\n", - "for i in range (5): q.put(i)\n", + "for i in[3, 4, 5]: q.put(i)\n", "\n", - "while not q.empty(): \n", + "while not q.empty():\n", " print(q.get(), end=\" \")" ] }, @@ -416,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 12, "id": "6a31eba0", "metadata": {}, "outputs": [], @@ -428,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 13, "id": "0fbe9868", "metadata": {}, "outputs": [ @@ -438,7 +449,7 @@ "[1, 2, 3, 4, 4, 9, 10, 8, 7]" ] }, - "execution_count": 47, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -449,7 +460,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 14, "id": "c72a1070", "metadata": {}, "outputs": [ @@ -459,7 +470,7 @@ "1" ] }, - "execution_count": 48, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -470,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 15, "id": "be80320d", "metadata": {}, "outputs": [ @@ -480,7 +491,7 @@ "[2, 4, 3, 4, 7, 9, 10, 8]" ] }, - "execution_count": 49, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -491,7 +502,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 16, "id": "2aa86e2e", "metadata": {}, "outputs": [], @@ -501,7 +512,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 17, "id": "5d5f290a", "metadata": {}, "outputs": [ @@ -511,7 +522,7 @@ "[2, 4, 3, 4, 7, 9, 10, 8, 5]" ] }, - "execution_count": 51, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -534,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 18, "id": "e34b0c73", "metadata": {}, "outputs": [], @@ -562,12 +573,12 @@ " min = i \n", " item = self.__queue[min] \n", " del self.__queue[min] \n", - " return item \n" + " return item " ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 19, "id": "0e326166", "metadata": { "slideshow": { @@ -591,71 +602,121 @@ "\n", "myQueue = queue.PriorityQueue()\n", "\n", - "# Insert elements into the priority queue\n", + "# insert\n", "myQueue.put(12)\n", "myQueue.put(1)\n", "myQueue.put(14)\n", "myQueue.put(7)\n", "\n", - "# Print the contents of the priority queue\n", + "# print\n", "while not myQueue.empty():\n", " print(myQueue.get())" ] }, { "cell_type": "markdown", - "id": "01f5f437", + "id": "2654c276", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ - " ## Improvements\n", - "\n", - "- Handle empty lists" + "## Queue as linked list" ] }, { "cell_type": "code", - "execution_count": null, - "id": "5aa5a7ad", + "execution_count": 21, + "id": "634ffa96", "metadata": {}, "outputs": [], "source": [ + "class Node:\n", + " def __init__(self, data):\n", + " self.data = data\n", + " self.next = None\n", + "\n", + "class Queue:\n", + " def __init__(self):\n", + " self.front = None \n", + " self.rear = None \n", + " self.size = 0 \n", + "\n", + " def is_empty(self):\n", + " return self.size == 0\n", + "\n", + " def push(self, data):\n", + " new_node = Node(data)\n", + " if self.rear is None:\n", + " self.front = self.rear = new_node\n", + " else:\n", + " self.rear.next = new_node\n", + " self.rear = new_node\n", + " self.size += 1\n", + "\n", + " def get(self):\n", + " if self.is_empty():\n", + " raise IndexError(\"queue is empty\")\n", + " \n", + " temp = self.front\n", + " self.front = self.front.next\n", + " \n", + " if self.front is None:\n", + " self.rear = None\n", + " \n", + " self.size -= 1\n", + " return temp.data\n", "\n", - "def dequeue(self):\n", - " if not self.is_empty():\n", - " return self.items.pop(0)\n", - " else:\n", - " raise IndexError(\"Queue is empty\")\n", - " " + " def peek(self):\n", + " if self.is_empty():\n", + " raise IndexError(\"empty queue\")\n", + " return self.front.data\n", + "\n", + " def get_size(self):\n", + " return self.size" ] }, { "cell_type": "code", - "execution_count": null, - "id": "89407366", - "metadata": {}, - "outputs": [], + "execution_count": 22, + "id": "2b365120", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "first item peek: 1\n", + "size: 3\n", + "get: 1\n", + "peek second item: 2\n", + "size: 2\n" + ] + } + ], "source": [ - "import queue\n", - "q = queue.Queue(5000)\n", - "# q.put([1,2,3,4])\n", - "q.put([2,3,4,5])\n", - "try:\n", - " [x1, x2, x3, x4] = q.get(block=True)\n", - "except queue.Empty:\n", - " print(\"Problème : aucun élément dans la file\")\n", - "else:\n", - " print([x1, x2, x3, x4])\n", - "\n" + "queue = Queue()\n", + "queue.push(1)\n", + "queue.push(2)\n", + "queue.push(3)\n", + "\n", + "print(\"first item peek:\", queue.peek())\n", + "print(\"size:\", queue.get_size()) \n", + "\n", + "print(\"get:\", queue.get()) \n", + "print(\"peek second item:\", queue.peek()) \n", + "print(\"size:\", queue.get_size()) " ] }, { "cell_type": "code", "execution_count": null, - "id": "1bf32fdb", + "id": "0dbc4041", "metadata": {}, "outputs": [], "source": [] diff --git a/lectures-notebooks/04-05-06-programming-strategies.ipynb b/lectures-notebooks/04-05-06-programming-strategies.ipynb deleted file mode 100644 index 4c0ca2272abf686e33a3b8b3fe4a706f78f5cd6f..0000000000000000000000000000000000000000 --- a/lectures-notebooks/04-05-06-programming-strategies.ipynb +++ /dev/null @@ -1,1171 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "75778ca0", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "# UE5 Fundamentals of Algorithms\n", - "## Lecture 4-5-6: Programming strategies\n", - "### Ecole Centrale de Lyon, Bachelor of Science in Data Science for Responsible Business\n", - "#### Romain Vuillemot\n", - "<center><img src=\"figures/Logo_ECL.png\" style=\"width:300px\"></center>" - ] - }, - { - "cell_type": "markdown", - "id": "f0c7488c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Outline\n", - "- Definitions of programming strategies\n", - "- Divide and conquer\n", - "- Greedy algorithms\n", - "- Dynamic programming" - ] - }, - { - "cell_type": "markdown", - "id": "b3ce5394", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "# Programming strategies\n", - "\n", - "> A programming strategy are algorithms aimed at solving a specific problem in a precise manner.\n", - "\n", - "Examples of Strategies:\n", - "\n", - "- **Divide and Conquer:** Divide a problem into simpler sub-problems, solve the sub-problems, and then combine the solutions to solve the original problem.\n", - "\n", - "- **Dynamic Programming:** Solve a problem by breaking it down into sub-problems, calculating and memorizing the results of sub-problems to avoid unnecessary recomputation.\n", - "\n", - "- **Greedy Algorithm:** Make a series of choices that seem locally optimal at each step to find a solution, with the hope that the result will be globally optimal as well.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "bd22ed47", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "# Divide and conquer\n", - "\n", - "> The **Divide and Conquer** strategy involves breaking a complex problem into smaller, similar subproblems, solving them recursively, and then combining their solutions to address the original problem efficiently.\n", - "\n", - "1. **Divide:** Divide the original problem into subproblems of the same type.\n", - "\n", - "2. **Conquer:** Solve each of these subproblems recursively.\n", - "\n", - "3. **Combine:** Combine the answers appropriately.\n", - "\n", - "_It is very close to the recursive approach_\n" - ] - }, - { - "cell_type": "markdown", - "id": "d12319a6", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "\n", - "### Examples of divide and conquer algorithms:\n", - "\n", - "- Binary search\n", - "- Quick sort and merge sort\n", - "- Map Reduce\n", - "- Others: Fast multiplication (Karatsuba)" - ] - }, - { - "cell_type": "markdown", - "id": "fbb1b64c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Binary search\n", - "\n", - "_Given a sorted list, find or insert a specific value while keeping the order._\n", - "\n", - "<img src=\"figures/recherche-dichotomique.png\" style=\"width:500px\">\n", - "\n", - "See [the notebook](03-lists-search-sort.ipynb)." - ] - }, - { - "cell_type": "markdown", - "id": "bba7c4c6", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Quick sort\n", - "\n", - "Recursive sorting algorithm which works in two steps:\n", - "\n", - "1. select a pivot element \n", - "2. partitioning the array into smaller sub-arrays, then sorting those sub-arrays.\n", - "\n", - "<img src=\"figures/quicksort.png\" style=\"height:400px\">" - ] - }, - { - "cell_type": "markdown", - "id": "400d3619", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Merge sort\n", - "\n", - "Divide an array recursively into two halves (based on a _pivot_ value), sorting each half, and then merging the sorted halves back together. This process continues until the entire array is sorted.<br> Complexity: $O(n log(n))$.\n", - "\n", - "<img src=\"figures/tri-fusion.png\" style=\"width:500px\">" - ] - }, - { - "cell_type": "markdown", - "id": "7d33da6c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Map reduce\n", - "\n", - "Divide a large dataset into smaller chunks and processes them independantly. Two main steps: \n", - "- the Map stage, where data is filtered and transformed into key-value pairs\n", - "- the Reduce stage, where data is aggregated and the final result is produced.\n", - "<img src=\"figures/Mapreduce.png\" style=\"width:700px\">" - ] - }, - { - "cell_type": "markdown", - "id": "a590fbe9", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Map reduce (without map reduce..)" - ] - }, - { - "cell_type": "markdown", - "id": "9f163e56", - "metadata": {}, - "source": [ - "_Calculate the sum of squares values from a list of numerical values._" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "ba28ddd1", - "metadata": {}, - "outputs": [], - "source": [ - "data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "f34730fa", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[(1, 1), (4, 2), (9, 3), (16, 4), (25, 5), (36, 6), (49, 7), (64, 8), (81, 9), (100, 10)]\n", - "385\n" - ] - } - ], - "source": [ - "result = {}\n", - "for num in data:\n", - " square = num * num\n", - " result[square] = num\n", - "\n", - "final_result = list(result.items())\n", - "\n", - "print(final_result)\n", - "print(sum([x[0] for x in final_result]))" - ] - }, - { - "cell_type": "markdown", - "id": "261c7a2c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Map reduce (Python)\n", - "\n", - "1. Divide the problem in sub-problems\n", - "2. Apply the mapping function\n", - "3. Reduce the results" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "9ef9a5cc", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "385\n" - ] - } - ], - "source": [ - "data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", - "\n", - "def mapper(numbers):\n", - " result = []\n", - " for num in numbers: # calculate the squares\n", - " result.append((num, num * num))\n", - " return result\n", - "\n", - "def reducer(pairs):\n", - " result = {}\n", - " for key, value in pairs: # sums the squares\n", - " if key in result:\n", - " result[key] += value \n", - " else:\n", - " result[key] = value\n", - " return result.items()\n" - ] - }, - { - "cell_type": "markdown", - "id": "975a4b17", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Map reduce (Python)\n", - "\n", - "1. Divide the problem in sub-problems\n", - "2. Apply the mapping function\n", - "3. Reduce the results" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "77215ac8", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'data' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m chunk_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m2\u001b[39m\n\u001b[0;32m----> 2\u001b[0m chunks \u001b[38;5;241m=\u001b[39m [data[i:i\u001b[38;5;241m+\u001b[39mchunk_size] \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;28mlen\u001b[39m(\u001b[43mdata\u001b[49m), chunk_size)]\n\u001b[1;32m 4\u001b[0m mapped_data \u001b[38;5;241m=\u001b[39m [mapper(chunk) \u001b[38;5;28;01mfor\u001b[39;00m chunk \u001b[38;5;129;01min\u001b[39;00m chunks] \n\u001b[1;32m 6\u001b[0m grouped_data \u001b[38;5;241m=\u001b[39m {}\u001b[38;5;66;03m# map\u001b[39;00m\n", - "\u001b[0;31mNameError\u001b[0m: name 'data' is not defined" - ] - } - ], - "source": [ - "\n", - "chunk_size = 2\n", - "chunks = [data[i:i+chunk_size] for i in range(0, len(data), chunk_size)]\n", - "\n", - "mapped_data = [mapper(chunk) for chunk in chunks] \n", - "\n", - "grouped_data = {}# map\n", - "for chunk in mapped_data:\n", - " for key, value in chunk:\n", - " if key in grouped_data:\n", - " grouped_data[key].append(value)\n", - " else:\n", - " grouped_data[key] = [value]\n", - "\n", - "reduced_data = [reducer(list(grouped_data.items()))] # reduce\n", - "result = sum([x[1][0] for x in final_result])\n", - "\n", - "print(result)" - ] - }, - { - "cell_type": "markdown", - "id": "be8744c6", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Discussion on Divide and Conquer\n", - "\n", - "- Similarities with recursion by dividing a problem in a sub-problem\n", - "\n", - "- But with a combination step (which may hold most of the code difficulty)\n", - "\n", - "- Can be implemented in a non-recursive way\n", - "\n", - "- $n log(n)$ complexity when split the problem and solves each split" - ] - }, - { - "cell_type": "markdown", - "id": "c1f7b96a", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "# Greedy algorithms\n", - "\n", - "> Algorithms that make a locally optimal choice.\n", - "\n", - "### Examples:\n", - "\n", - "- Change-making problem\n", - "- Knapsack problem\n", - "- Maze solving\n", - "- Graph coloring" - ] - }, - { - "cell_type": "markdown", - "id": "17867aaf", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Example: Change-making problem\n", - "\n", - "\n", - "$Q_{opt}(S,M) = min \\ \\sum_{i=1}^n x_i$.\n", - " \n", - "$S$: all the available coins\n", - " \n", - "$M$: amount\n", - " \n", - "Greedy solution:\n", - "\n", - "1. Sort the coins in descending order\n", - "\n", - "2. Initialize a variable to count coins used\n", - "\n", - "3. Substrack the number of coins used (if limited)\n", - "\n", - "4. Continue this process until amount becomes zero.\n" - ] - }, - { - "cell_type": "markdown", - "id": "adb8552d", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Example: Change-making problem (Python)\n", - "\n", - "_Greedy solution to return the minimal number of coins necessary._" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "id": "e900b357", - "metadata": {}, - "outputs": [], - "source": [ - "coins = [1, 2, 5]\n", - "amount = 11" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "id": "b4e91e95", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def coin_change_greedy(coins, amount):\n", - " coins.sort(reverse=True) # important! sort in descending order\n", - " \n", - " coin_count = 0\n", - " remaining_amount = amount\n", - " \n", - " for coin in coins:\n", - " while remaining_amount >= coin:\n", - " remaining_amount -= coin\n", - " coin_count += 1\n", - " \n", - " if remaining_amount == 0:\n", - " return coin_count\n", - " else:\n", - " return -1" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "131184bf", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3\n" - ] - } - ], - "source": [ - "print(coin_change_greedy(coins, amount)) # 3 (11 = 5 + 5 + 1)" - ] - }, - { - "cell_type": "markdown", - "id": "3ab88980", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Example: Change-making problem (Python)\n", - "\n", - "_Greedy solution that returns the **list of coins** used._\n", - "\n", - "\n", - "Tip: use a list with the same structure as coins." - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "id": "9401818b", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Minimum coins needed: 6\n", - "Coins used: [2, 1, 0, 3]\n" - ] - } - ], - "source": [ - "def coin_change_greedy(coins, amount):\n", - " coins.sort(reverse=True) \n", - " \n", - " coin_count = 0\n", - " remaining_amount = amount\n", - " used_coins = [0] * len(coins)\n", - " \n", - " for i, coin in enumerate(coins):\n", - " while remaining_amount >= coin:\n", - " remaining_amount -= coin\n", - " coin_count += 1\n", - " used_coins[i] += 1 \n", - " \n", - " if remaining_amount == 0:\n", - " return coin_count, used_coins\n", - " else:\n", - " return -1, []\n", - "\n", - "coins = [25, 10, 5, 1]\n", - "amount = 63\n", - "min_coins, coins_used = coin_change_greedy(coins, amount)\n", - "\n", - "print(f\"Minimum coins needed: {min_coins}\")\n", - "print(\"Coins used:\", coins_used)" - ] - }, - { - "cell_type": "markdown", - "id": "1ae1863a", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Example: Change-making problem (Python)\n", - "\n", - "_Greedy solution that returns the **list of coins** used from **a limited availability of coins**._\n", - "\n", - "\n", - "Tip: use a list of coins availability of same structure as coins." - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "id": "8a936fe3", - "metadata": {}, - "outputs": [], - "source": [ - "coins = [25, 10, 5, 1]\n", - "amount = 63\n", - "coin_availability = [1, 2, 3, 4]" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "id": "1700c684", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Minimum coins needed: 9\n", - "Coins used: [1, 2, 3, 3]\n" - ] - } - ], - "source": [ - "def coin_change_greedy(coins, amount, coin_availability):\n", - " coins.sort(reverse=True)\n", - "\n", - " coin_count = 0\n", - " remaining_amount = amount\n", - " used_coins = [0] * len(coins)\n", - " \n", - " for i, coin in enumerate(coins):\n", - " while remaining_amount >= coin and used_coins[i] < coin_availability[i]:\n", - " remaining_amount -= coin\n", - " coin_count += 1\n", - " used_coins[i] += 1\n", - " \n", - " if remaining_amount == 0:\n", - " return coin_count, used_coins\n", - " else:\n", - " return -1, []\n", - "\n", - "min_coins, coins_used = coin_change_greedy(coins, amount, coin_availability)\n", - "\n", - "\n", - "print(f\"Minimum coins needed: {min_coins}\")\n", - "print(\"Coins used:\", coins_used)" - ] - }, - { - "cell_type": "markdown", - "id": "7aeac877", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Discussion on Greedy algorithms\n", - "\n", - "- Often considered as an _heuristic_\n", - "- Easy to understand, implement and communicate\n", - "- They often lead to non-optimal solution" - ] - }, - { - "cell_type": "markdown", - "id": "5f36ec7b", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "# Dynamic programming\n", - "\n", - "> **Dynamic programming** involves breaking down a problem into subproblems, *solving* these subproblems, and *combining* their solutions to obtain the solution to the original problem. The steps are as follows:\n", - "\n", - "1. Characterize the structure of an optimal solution.\n", - "2. Define the value of an optimal solution recursively.\n", - "3. Reconstruct the optimal solution from the computations.\n", - "\n", - "Notes :\n", - "- Applies to problems with optimal substructure.\n", - "- Also applies to problems where solutions are often interrelated (distinguishing it from divide and conquer).\n", - "- Utilizes a memoization approach, involving storing an intermediate solution (e.g., in a table).\n" - ] - }, - { - "cell_type": "markdown", - "id": "b125a618", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Examples of dynamic programming algorithms\n", - "\n", - "- Fibonacci Sequence\n", - "- Rod Cutting\n", - "- Sequence Alignment, Longest Subsequence Finding\n", - "- Shortest Path Finding" - ] - }, - { - "cell_type": "markdown", - "id": "3e3de7c4", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Fibonnacci (reminder)\n", - "\n", - "To calculate the $n$-th number in the Fibonacci sequence, which is determined as follows:\n", - "\n", - "latex\n", - "Copy code\n", - "$fib(n) = fib(n-1) + fib(n-2)$, $n \\in \\mathbb{N}$\n", - "Where the sequence starts with 1, 1, and then continues as 2, 3, 5, 8, 13, 21, and so on, to find the 9th number ($n = 9$).\n", - "\n", - "Let's calculate the 9th Fibonacci number step by step:\n", - "\n", - "$fib(1) = 1$\n", - "\n", - "$fib(2) = 1$\n", - "\n", - "$fib(3) = fib(2) + fib(1) = 1 + 1 = 2$\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "cced97f7", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Fibonnacci (naive)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "7bc7701a", - "metadata": {}, - "outputs": [], - "source": [ - "def fib(n):\n", - " if n < 2:\n", - " return n\n", - " else:\n", - " return fib(n - 1) + fib(n - 2)" - ] - }, - { - "cell_type": "markdown", - "id": "81eb2956", - "metadata": {}, - "source": [ - "Call tree (for $n = 6$):" - ] - }, - { - "cell_type": "markdown", - "id": "b588ca8b", - "metadata": {}, - "source": [ - "<img src=\"figures/fibonacci-tree.png\" style=\"width:400px\">" - ] - }, - { - "cell_type": "markdown", - "id": "42b6ecd1", - "metadata": {}, - "source": [ - "Requires to calculate the same F-value multiple times." - ] - }, - { - "cell_type": "markdown", - "id": "fc74f400", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Fibonnacci (dynamic programming)\n", - "\n", - "_Optimized using a `lookup` table, which is a data structure to memoize values that have already been computed._" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "id": "9eae506b", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6-th Fibonacci number is 8\n" - ] - } - ], - "source": [ - "def fib(n, lookup):\n", - " if n == 0 or n == 1:\n", - " lookup[n] = n\n", - "\n", - " if lookup[n] is None:\n", - " lookup[n] = fib(n - 1, lookup) + fib(n - 2, lookup)\n", - "\n", - " return lookup[n]\n", - "\n", - "def main():\n", - " n = 6\n", - "\n", - " lookup = [None] * (n + 1)\n", - " result = fib(n, lookup)\n", - " print(f\"{n}-th Fibonacci number is {result}\")\n", - "\n", - "if __name__==\"__main__\": \n", - " main() " - ] - }, - { - "cell_type": "markdown", - "id": "ac5e42aa", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Rod cutting \n", - "\n", - "_Given a list of cuts and prices, identify the optimal cuts. Given the example below, what is the best cutting strategy for a rod of size `4`?_\n", - "\n", - "<img src=\"figures/rod-cutting.png\" style=\"width:500px\">" - ] - }, - { - "cell_type": "markdown", - "id": "2f933396", - "metadata": {}, - "source": [ - "| size (i) | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |\n", - "|--------------|---|---|---|---|---|---|---|---|\n", - "| price (pi) | 1 | 5 | 8 | 9 |10 |17 |17 |20 |\n" - ] - }, - { - "cell_type": "markdown", - "id": "47119bfb", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "source": [ - "Solution: For a rod of size `4` optimal solution is 2 cuts of size 2 so $5 + 5 = 10$." - ] - }, - { - "cell_type": "markdown", - "id": "61aa819a", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Rod cutting: check a solution\n", - "\n", - "Given the previous table of size and price, check the cost of a given solution by defining a function `check_rod_cutting(prices, n)`." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "c42b61d4", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def check_rod_cutting(prices, n):\n", - " table = [0] * (n + 1)\n", - "\n", - " for i in range(1, n + 1):\n", - " max_price = float('-inf')\n", - " for j in range(1, i + 1):\n", - " max_price = max(max_price, prices[j] + table[i - j])\n", - " table[i] = max_price\n", - "\n", - " return table[n]" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "9a3bd3d3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The maximum total price for a rod of length 2 is 5\n" - ] - } - ], - "source": [ - "prices = [0, 1, 5, 8, 9, 10, 17, 17, 20]\n", - "n = 2\n", - "\n", - "max_total_price = check_rod_cutting(prices, n)\n", - "print(f\"The maximum total price for a rod of length {n} is {max_total_price}\")\n" - ] - }, - { - "cell_type": "markdown", - "id": "151c0a39", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Rod cutting (brute force)\n", - "\n", - "Let's solve the rod cutting problem using a brute force (naive) approach.\n", - "\n", - "1. define a value function\n", - "2. identify a base case\n", - "3. identify a recursion mechanism" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "6e7a3a0d", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def cut_brute_force(n, t):\n", - " if n == 0:\n", - " return 0\n", - " max_valeur = float('-inf')\n", - " for i in range(1, n + 1):\n", - " valeur_courante = t[i] + coupe_brute_force(n - i, t)\n", - " max_valeur = max(max_valeur, valeur_courante)\n", - " return max_valeur" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "1ee43707", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The maximum value for a rod of length 2 is 5.\n" - ] - } - ], - "source": [ - "lengths = [0, 1, 2, 3, 4, 5, 6, 7, 8]\n", - "values = [0, 1, 5, 8, 9, 10, 17, 17, 20]\n", - "rod_length = 2\n", - "max_value = coupe_brute_force(rod_length, values)\n", - "print(f\"The maximum value for a rod of length {rod_length} is {max_value}.\")" - ] - }, - { - "cell_type": "markdown", - "id": "1596f825", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Rod cutting (dynamic programming)\n", - "\n", - "\n", - "General case:\n", - "\n", - "- Cutting a rod of length $i$ optimally.\n", - "- Cutting a rod of length $(n - i)$ optimally.\n", - "\n", - "\n", - "<img src=\"figures/rod-cutting-tree.png\" style=\"width:500px\">\n", - "\n", - "General case: $V_{n} = max_{1 \\leq i \\leq n} (p_i + V_{n - i})$ " - ] - }, - { - "cell_type": "markdown", - "id": "be5d9a15", - "metadata": {}, - "source": [ - "| size (i) | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |\n", - "|--------------|---|---|---|---|---|---|---|---|\n", - "| price (pi) | 1 | 5 | 8 | 9 |10 |17 |17 |20 |\n" - ] - }, - { - "cell_type": "markdown", - "id": "b2845bd5", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "$V_{3} = \\max_{1 \\leq i \\leq 3} (p_i + V_{3 - i})$\n", - "\n", - "Let's calculate `V_3` step by step for each possible value of `i`:\n", - "\n", - "1. If `i = 1`, we cut the rod into two pieces: one of length 1 and one of length 2.\n", - " - $V_1 = p_1 = 2$\n", - " - $V_{3 - 1} = V_2$\n", - "\n", - "2. If `i = 2`, we cut the rod into two pieces: one of length 2 and one of length 1.\n", - " - $V_2 = p_2 = 5$\n", - " - $V_{3 - 2} = V_1$\n", - "\n", - "3. If `i = 3`, we cut the rod into one piece of length 3.\n", - " - $V_3 = p_3 = 9$\n", - " - $V_{3 - 3} = V_0$ (Assuming that $V_0 = 0$ as a base case.)" - ] - }, - { - "cell_type": "markdown", - "id": "eb092761", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "Now, we can calculate the values for `V_2` and `V_1` recursively using the same formula:\n", - "\n", - "For `V_2`:\n", - "$$V_2 = \\max(p_1 + V_1, p_2 + V_0) = \\max(2 + V_1, 5 + 0) = \\max(2 + 2, 5 + 0) = \\max(4, 5) = 5$$\n", - "\n", - "For `V_1`:\n", - "$$V_1 = \\max(p_1 + V_0) = \\max(2 + 0) = 2$$\n", - "\n", - "So, `V_2` is 5 and `V_1` is 2.\n", - "\n", - "Now, we can calculate `V_3` using the values of `V_2` and `V_1`:\n", - "\n", - "$$V_3 = \\max(p_1 + V_2, p_2 + V_1, p_3 + V_0) = \\max(1 + 5, 5 + 2, 9 + 0) = \\max(6, 7, 8) = 8$$" - ] - }, - { - "cell_type": "markdown", - "id": "0e097cbf", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Rod cutting (dynamic programming)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "5812781c", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Max size cut 22 8\n" - ] - } - ], - "source": [ - "INT_MIN = 0\n", - "\n", - "def cutRod(price, n): \n", - "\n", - " # init cache tables\n", - " val = [0 for x in range(n+1)] \n", - " val[0] = 0\n", - " \n", - " for i in range(1, n+1): \n", - " max_val = INT_MIN \n", - " for j in range(i): \n", - " max_val = max(max_val, price[j] + val[i-j-1]) \n", - " val[i] = max_val \n", - " \n", - " return val[n] \n", - " \n", - "if __name__==\"__main__\": \n", - " arr = [1, 5, 8, 9, 10, 17, 17, 20] \n", - " size = len(arr) \n", - " print(\"Max size cut \" + str(cutRod(arr, size)), len(arr) ) " - ] - }, - { - "cell_type": "markdown", - "id": "c42683cc", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Change-making problem (dynamic programming)\n", - "\n", - "\n", - "$Q_{opt}(S,M) = min \\ \\sum_{i=1}^n x_i$.\n", - " \n", - "$S$: all the available coins\n", - " \n", - "$M$: amount\n", - " \n", - "$\n", - " Q_{opt}(i,m) = min\n", - "\\begin{cases}\n", - " 1 + Q_{opt}(i, m - v_i) \\quad si \\ (m - v_i) \\geq 0 \\qquad \\text{we use a coin of type $i$ of value $v_i$}\\\\\n", - "Q_{opt}(i-1, m) \\qquad \\quad si \\ i \\geq 1 \\qquad \\qquad \\quad \\text{we do not use coin of type $i$, \n", - "we use $i-1$}\n", - "\\end{cases}\n", - "$\n", - "\n", - "<img src=\"figures/coins-changing.png\" style=\"width:500px\">\n" - ] - }, - { - "cell_type": "markdown", - "id": "fd3e3af7", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Lessons on dynamic programming\n", - "\n", - "- It is necessary to study each problem on a case-by-case basis.\n", - "\n", - "- Storing a large number of partial results, which requires significant memory usage.\n", - "\n", - "- Suitable for only certain problems (min, max, counting the number of solutions).\n" - ] - } - ], - "metadata": { - "celltoolbar": "Slideshow", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/lectures-notebooks/08-binary-trees.ipynb b/lectures-notebooks/08-binary-trees.ipynb deleted file mode 100644 index d4d90d26415f3e39bc98129501b221917e50f636..0000000000000000000000000000000000000000 --- a/lectures-notebooks/08-binary-trees.ipynb +++ /dev/null @@ -1,995 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "09fc003e", - "metadata": {}, - "source": [ - "# UE5 Fundamentals of Algorithms\n", - "## Lecture 8: Binary trees\n", - "### Ecole Centrale de Lyon, Bachelor of Science in Data Science for Responsible Business\n", - "#### Romain Vuillemot\n", - "<center><img src=\"figures/Logo_ECL.png\" style=\"width:300px\"></center>" - ] - }, - { - "cell_type": "markdown", - "id": "74743087", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, - "source": [ - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "93dd9eb2", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "\n", - "#sys.path.append(\"../../\")\n", - "from utils import draw_directed_graph" - ] - }, - { - "cell_type": "markdown", - "id": "f3ebe7d2", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Outline\n", - "- Definitions\n", - "- Data structures\n", - "- Basic operations\n", - "- Properties" - ] - }, - { - "cell_type": "markdown", - "id": "a4973a08", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Definitions\n", - "\n", - "> A **Tree** is a hierarchical data structure with nodes (vertex) connected by links (edge)\n", - "\n", - "- A non-linear data structures (multiple ways to traverse it)\n", - "- Nodes are connected by only one path (a series of edges) so trees have no cycle\n", - "- Edges are also called links, they can be traversed in both ways (no orientation)\n", - "- Trees are most commonly represented as a node-lin diagram, with the root at the top and the leaves (nodes without children) at the bottom)." - ] - }, - { - "cell_type": "markdown", - "id": "28bb09dc", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Binary trees\n", - "\n", - "We focus on _binary trees._\n", - "\n", - "> Trees that have at most two children\n", - "\n", - "- Children are ordered (left and right)\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "51f0cf57", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", - "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", - " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", - "<!-- Generated by graphviz version 7.1.0 (20230121.1956)\n", - " -->\n", - "<!-- Pages: 1 -->\n", - "<svg width=\"134pt\" height=\"116pt\"\n", - " viewBox=\"0.00 0.00 134.00 116.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", - "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 112)\">\n", - "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-112 130,-112 130,4 -4,4\"/>\n", - "<!-- A -->\n", - "<g id=\"node1\" class=\"node\">\n", - "<title>A</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"63\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"63\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">A</text>\n", - "</g>\n", - "<!-- B -->\n", - "<g id=\"node2\" class=\"node\">\n", - "<title>B</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"27\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">B</text>\n", - "</g>\n", - "<!-- A->B -->\n", - "<g id=\"edge1\" class=\"edge\">\n", - "<title>A->B</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M54.65,-72.76C50.42,-64.55 45.19,-54.37 40.42,-45.09\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"43.68,-43.79 36,-36.49 37.46,-46.99 43.68,-43.79\"/>\n", - "</g>\n", - "<!-- C -->\n", - "<g id=\"node3\" class=\"node\">\n", - "<title>C</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"99\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">C</text>\n", - "</g>\n", - "<!-- A->C -->\n", - "<g id=\"edge2\" class=\"edge\">\n", - "<title>A->C</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M71.35,-72.76C75.58,-64.55 80.81,-54.37 85.58,-45.09\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"88.54,-46.99 90,-36.49 82.32,-43.79 88.54,-46.99\"/>\n", - "</g>\n", - "</g>\n", - "</svg>\n" - ], - "text/plain": [ - "<graphviz.graphs.Digraph at 0x1040b0f40>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "T = {\n", - " 'A': ['B', 'C'],\n", - "}\n", - "\n", - "draw_directed_graph(T)" - ] - }, - { - "cell_type": "markdown", - "id": "55e54f01", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Binary trees data structures\n", - "\n", - "Binary trees can be stored in multiple ways\n", - "\n", - "- The first element is the value of the node.\n", - "- The second element is the left subtree.\n", - "- The third element is the right subtree.\n", - "\n", - "Here are examples:\n", - "\n", - "- Adjacency list `T = {'A': ['B', 'C']}`\n", - "- Arrays `[\"A\", \"B\"]`\n", - "- Class / Object-oriented programming `Class Node()`\n", - "\n", - "Other are possible: using linked list, modules, etc.\n", - "\n", - "Adjacency lists are the most common ways and can be achieved in multiple fashions." - ] - }, - { - "cell_type": "markdown", - "id": "30faa950", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Binary trees data structures (dict + lists)\n", - "\n", - "_Binary trees using dictionnaries where nodes are keys and edges are Lists._" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "d495c8a5", - "metadata": {}, - "outputs": [], - "source": [ - "T = {\n", - " 'A' : ['B','C'],\n", - " 'B' : ['D', 'E'],\n", - " 'C' : [],\n", - " 'D' : [],\n", - " 'E' : []\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "b2b2c183", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Using OOP" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "5df1c518", - "metadata": {}, - "outputs": [], - "source": [ - "class Node:\n", - " def __init__(self, value):\n", - " self.value = value\n", - " self.left = None\n", - " self.right = None\n", - " \n", - " def get_value(self):\n", - " return self.value\n", - " \n", - " def set_value(self, v = None):\n", - " self.value = v" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "abc855b1", - "metadata": {}, - "outputs": [], - "source": [ - "root = Node(4)\n", - "root.left = Node(2)\n", - "root.right = Node(5)\n", - "root.left.left = Node(1)\n", - "root.left.right = Node(3)" - ] - }, - { - "cell_type": "markdown", - "id": "8b8ec2a0", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Definitions on binary trees\n", - "\n", - "`Nodes` - a tree is composed of nodes that contain a `value` and `children`.\n", - "\n", - "`Edges` - are the connections between nodes; nodes may contain a value.\n", - "\n", - "`Root` - the topmost node in a tree; there can only be one root.\n", - "\n", - "`Parent and child` - each node has a single parent and up to two children.\n", - "\n", - "`Leaf` - no node below that node.\n", - "\n", - "`Depth` - the number of edges on the path from the root to that node.\n", - "\n", - "`Height` - maximum depth in a tree." - ] - }, - { - "cell_type": "markdown", - "id": "b0bb3608", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "# Basic operations" - ] - }, - { - "cell_type": "markdown", - "id": "8726ff36", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Get the root of a tree\n", - "\n", - "_Return the topmost node in a tree (there can only be one root)._" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "1fbb1c2f", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def get_root(T):\n", - " if (len(T.keys()) > 0):\n", - " return list(T.keys())[0]\n", - " else:\n", - " return -1" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "6b4492dc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'A'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_root(T)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "ea01e802", - "metadata": {}, - "outputs": [], - "source": [ - "assert get_root({}) == -1\n", - "assert get_root({\"A\": []}) == \"A\"\n", - "assert isinstance(get_root({\"A\": []}), str) # to make sure there is only 1 root (eg not a list)" - ] - }, - { - "cell_type": "markdown", - "id": "3ffffeda", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Get all nodes from a Tree\n", - "\n", - "_Return all the nodes in the tree (as a list of nodes names)._" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "3af082b7", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def get_nodes(T):\n", - " return list(T.keys())" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "ede5b5f4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['A', 'B', 'C', 'D', 'E']" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_nodes(T)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "2d3305d5", - "metadata": {}, - "outputs": [], - "source": [ - "assert get_nodes(T) == ['A', 'B', 'C', 'D', 'E']\n", - "assert get_nodes({}) == []" - ] - }, - { - "cell_type": "markdown", - "id": "db9c925d", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Get all links from a Tree\n", - "\n", - "_Return all the links as a list of pairs as `Tuple`._" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "b50fe9c2", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def get_links(tree):\n", - " links = []\n", - " for node, neighbors in tree.items():\n", - " for neighbor in neighbors:\n", - " links.append((node, neighbor))\n", - " return links" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "8958bd83", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E')]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_links(T)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "30dd31d3", - "metadata": {}, - "outputs": [], - "source": [ - "assert get_links(T) == [('A', 'B'), ('A', 'C'), ('B', 'D'), ('B', 'E')]\n", - "assert get_links({}) == []" - ] - }, - { - "cell_type": "markdown", - "id": "95accba5", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Get the parent of a node\n", - "\n", - "_Return the parent node of a given node (and -1 if the root)._" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "37fbe31b", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def get_parent(tree, node_to_find):\n", - " for parent, neighbors in tree.items():\n", - " if node_to_find in neighbors:\n", - " return parent\n", - " return None" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "78e88d23", - "metadata": {}, - "outputs": [], - "source": [ - "assert get_parent(T, 'D') == 'B'\n", - "assert get_parent(T, 'A') is None\n", - "assert get_parent({}, '') is None" - ] - }, - { - "cell_type": "markdown", - "id": "3fb6f347", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Check if the node is the root\n", - "\n", - "_Return True if the root not, else `None`._" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "164e4ef7", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def is_root(T, node):\n", - " return get_parent(T, node) is None" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "5c053617", - "metadata": {}, - "outputs": [], - "source": [ - "assert is_root(T, 'A') == True" - ] - }, - { - "cell_type": "markdown", - "id": "bba64730", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Get the children of a node\n", - "\n", - "_Given a node, return all its children as a `List`._" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "ac145c20", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def find_children(graph, parent_node):\n", - " children = graph.get(parent_node, [])\n", - " return children" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "9444a66f", - "metadata": {}, - "outputs": [], - "source": [ - "assert find_children(T, 'A') == ['B', 'C']\n", - "assert find_children(T, 'B') == ['D', 'E']\n", - "assert find_children(T, 'C') == []" - ] - }, - { - "cell_type": "markdown", - "id": "6f600f3d", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "source": [ - "### Check if the node is a leaf\n", - "\n", - "_Return `True` if the node has no children._" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "77f5f17c", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def is_leaf(T, node):\n", - " return len(find_children(T, node)) == 0" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "5f5078d6", - "metadata": {}, - "outputs": [], - "source": [ - "assert is_leaf(T, 'C') \n", - "assert not is_leaf(T, 'A')" - ] - }, - { - "cell_type": "markdown", - "id": "a41e666e", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Add/Delete a node\n", - "\n", - "_Given a tree as input._\n", - "\n", - "- Add a node to given a current partent\n", - "\n", - "- Remove a given node" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "c9312a43", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def add_node(graph, parent, new_node):\n", - " if parent in graph:\n", - " graph[parent].append(new_node)\n", - " else:\n", - " graph[parent] = [new_node]\n", - "\n", - "def delete_node(graph, node_to_delete):\n", - " for parent, children in graph.items():\n", - " if node_to_delete in children:\n", - " children.remove(node_to_delete)\n", - " if not children:\n", - " del graph[parent]" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "38181a0f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'A': ['F']}" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "U = {\"A\": []}\n", - "add_node(U, \"A\", 'F')\n", - "U" - ] - }, - { - "cell_type": "markdown", - "id": "4885c320", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Height of a tree\n", - "\n", - "_Calculate the longest path from the root to leaves. Tip: use a recursive approach_\n", - "\n", - "- if the node is a leaf, return 1\n", - "- for a current node, the height is the max height of its children + 1" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "6ef9af29", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def height(T, node):\n", - " if node not in T:\n", - " return 0 # leaf\n", - " children = T[node]\n", - " if not children:\n", - " return 1 # leaf \n", - " list_heights = []\n", - " for child in children:\n", - " list_heights.append(height(T, child))\n", - " return 1 + max(list_heights)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "44a54ec8", - "metadata": {}, - "outputs": [], - "source": [ - "assert height(T, 'A') == 3\n", - "assert height(T, 'B') == 2\n", - "assert height(T, 'C') == 1" - ] - }, - { - "cell_type": "markdown", - "id": "e35608be", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Height of a binary tree\n", - "\n", - "\n", - "<img src=\"figures/hauteur-arbre.png\" style=\"width: 400px\">\n", - "\n", - "$n = 2^{(h+1)} - 1$\n", - "\n", - "$n + 1 = 2^{(h+1)}$\n", - "\n", - "$log(n + 1) = log(2^{(h+1)})$\n", - "\n", - "$log(n + 1) = (h+1) log(2)$\n", - "\n", - "$log(n + 1) / log(2) = h + 1$\n", - "\n", - "so $h = log(n + 1) / log(2) - 1$\n", - "\n", - "$h$ is equivalent to $log(n)$" - ] - }, - { - "cell_type": "markdown", - "id": "94f34cf8", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Binary trees (using Arrays)\n", - "\n", - "\n", - "<img src=\"figures/arbre-tableau.png\" style=\"width: 400px\">\n", - "\n", - "\n", - "In a complete or balanced binary tree: \n", - "- if the index of a node is equal to $i$, then the position indicating its left child is at $2i$, \n", - "- and the position indicating its right child is at $2i + 1$.\n", - "\n", - "Also works for ternary trees, etc." - ] - }, - { - "cell_type": "markdown", - "id": "8afab007", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Visualize a tree" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "610ad3bb", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", - "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", - " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", - "<!-- Generated by graphviz version 7.1.0 (20230121.1956)\n", - " -->\n", - "<!-- Pages: 1 -->\n", - "<svg width=\"152pt\" height=\"188pt\"\n", - " viewBox=\"0.00 0.00 152.00 188.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", - "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 184)\">\n", - "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-184 148,-184 148,4 -4,4\"/>\n", - "<!-- 0 -->\n", - "<g id=\"node1\" class=\"node\">\n", - "<title>0</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-162\" rx=\"18\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"99\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">0</text>\n", - "</g>\n", - "<!-- 1 -->\n", - "<g id=\"node2\" class=\"node\">\n", - "<title>1</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"72\" cy=\"-90\" rx=\"18\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"72\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">1</text>\n", - "</g>\n", - "<!-- 0->1 -->\n", - "<g id=\"edge1\" class=\"edge\">\n", - "<title>0->1</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M92.74,-144.76C89.64,-136.72 85.81,-126.81 82.3,-117.69\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"85.64,-116.63 78.77,-108.56 79.11,-119.15 85.64,-116.63\"/>\n", - "</g>\n", - "<!-- 2 -->\n", - "<g id=\"node3\" class=\"node\">\n", - "<title>2</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"126\" cy=\"-90\" rx=\"18\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"126\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">2</text>\n", - "</g>\n", - "<!-- 0->2 -->\n", - "<g id=\"edge4\" class=\"edge\">\n", - "<title>0->2</title>\n", - "<path fill=\"none\" stroke=\"red\" d=\"M105.26,-144.76C108.36,-136.72 112.19,-126.81 115.7,-117.69\"/>\n", - "<polygon fill=\"red\" stroke=\"red\" points=\"118.89,-119.15 119.23,-108.56 112.36,-116.63 118.89,-119.15\"/>\n", - "</g>\n", - "<!-- 4 -->\n", - "<g id=\"node5\" class=\"node\">\n", - "<title>4</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"18\" cy=\"-18\" rx=\"18\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"18\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">4</text>\n", - "</g>\n", - "<!-- 1->4 -->\n", - "<g id=\"edge2\" class=\"edge\">\n", - "<title>1->4</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M61.33,-75.17C54.01,-65.68 44.12,-52.86 35.64,-41.86\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"38.45,-39.78 29.57,-34 32.91,-44.06 38.45,-39.78\"/>\n", - "</g>\n", - "<!-- 5 -->\n", - "<g id=\"node6\" class=\"node\">\n", - "<title>5</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"72\" cy=\"-18\" rx=\"18\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"72\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">5</text>\n", - "</g>\n", - "<!-- 1->5 -->\n", - "<g id=\"edge3\" class=\"edge\">\n", - "<title>1->5</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M72,-71.7C72,-64.41 72,-55.73 72,-47.54\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"75.5,-47.62 72,-37.62 68.5,-47.62 75.5,-47.62\"/>\n", - "</g>\n", - "<!-- 3 -->\n", - "<g id=\"node4\" class=\"node\">\n", - "<title>3</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"126\" cy=\"-18\" rx=\"18\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"126\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">3</text>\n", - "</g>\n", - "<!-- 2->3 -->\n", - "<g id=\"edge5\" class=\"edge\">\n", - "<title>2->3</title>\n", - "<path fill=\"none\" stroke=\"red\" d=\"M126,-71.7C126,-64.41 126,-55.73 126,-47.54\"/>\n", - "<polygon fill=\"red\" stroke=\"red\" points=\"129.5,-47.62 126,-37.62 122.5,-47.62 129.5,-47.62\"/>\n", - "</g>\n", - "</g>\n", - "</svg>\n" - ], - "text/plain": [ - "<graphviz.graphs.Digraph at 0x1040b1000>" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from graphviz import Digraph\n", - "\n", - "dot = Digraph()\n", - "\n", - "dot.node_attr['shape'] = 'circle'\n", - "\n", - "dot.node('0', label='0') # Root\n", - "dot.node('1')\n", - "dot.node('2')\n", - "dot.node('3')\n", - "dot.node('4')\n", - "dot.node('5')\n", - "\n", - "dot.edge('0', '1')\n", - "dot.edge('1', '4')\n", - "dot.edge('1', '5')\n", - "\n", - "dot.edge('0', '2', color='red')\n", - "dot.edge('2', '3', color='red')\n", - "\n", - "dot # render" - ] - } - ], - "metadata": { - "celltoolbar": "Slideshow", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/lectures-notebooks/09-binary-trees-traversals.ipynb b/lectures-notebooks/09-binary-trees-traversals.ipynb deleted file mode 100644 index 7a5bf682d4771ec18edcd9a45057b74ad5ecd700..0000000000000000000000000000000000000000 --- a/lectures-notebooks/09-binary-trees-traversals.ipynb +++ /dev/null @@ -1,748 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a4973a08", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "# UE5 Fundamentals of Algorithms\n", - "## Lecture 9: Binary trees traversals\n", - "### Ecole Centrale de Lyon, Bachelor of Science in Data Science for Responsible Business\n", - "#### Romain Vuillemot\n", - "<center><img src=\"figures/Logo_ECL.png\" style=\"width:300px\"></center>" - ] - }, - { - "cell_type": "markdown", - "id": "01f6da4e", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, - "source": [ - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "c30e8f1b", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "from graphviz import Digraph\n", - "from IPython.display import display\n", - "from utils import draw_binary_tree" - ] - }, - { - "cell_type": "markdown", - "id": "6efad77c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Outline\n", - "\n", - "- Traversal methods\n", - "- Depth first\n", - "- Breadth first" - ] - }, - { - "cell_type": "markdown", - "id": "9fc3736a", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Binary trees traversal methods\n", - "\n", - "> Methods to explore and process nodes in a tree (or a graph).\n", - "\n", - "- Because Trees are non-linear, there are multiple possible paths\n", - "- Can be applied to the whole tree or until a certain condition is met\n", - "- Traversals methods will provide very different results" - ] - }, - { - "cell_type": "markdown", - "id": "7946fdbc", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Two main traversal strategies:\n", - "\n", - "<img src=\"figures/arbre-largeur-hauteur.png\" style=\"width: 400px\">\n", - "\n", - "1. **Depth-First search (DFS):**\n", - " - visiting a node (sarting with the root)\n", - " - then recursively traversing as deep as possible \n", - " - then explore another branch.\n", - "\n", - "\n", - "2. **Breadth-First search (BFS):**\n", - " - visiting a node ( with the root)\n", - " - explore all its neighbors (children) \n", - " - then mode move to the children." - ] - }, - { - "cell_type": "markdown", - "id": "66c57c10", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Depth-first search (or traversal)\n", - "\n", - "> **Depth-first search (DFS)** is a traversal method that visits all the leaves first in a tree (or a graph).\n", - "\n", - "1. Place the source node in a **stack**.\n", - "2. Remove the node from the top of the stack for processing.\n", - "3. Add all unexplored neighbors to the stack (at the top).\n", - "4. If the stack is not empty, go back to step 2.\n" - ] - }, - { - "cell_type": "markdown", - "id": "6e0cf5f3", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Depth-first search (or traversal)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "e6d32fe3", - "metadata": {}, - "outputs": [], - "source": [ - "def dfs(tree, start):\n", - " stack = [start]\n", - " while stack:\n", - " vertex = stack.pop()\n", - " print(vertex, end = ' ') # traitement\n", - " stack.extend(tree[vertex])" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "c49f305d", - "metadata": {}, - "outputs": [], - "source": [ - "tree = {'A': set(['B', 'C']),\n", - " 'B': set(['D', 'E', 'F']),\n", - " 'C': set([]),\n", - " 'D': set([]),\n", - " 'E': set([]),\n", - " 'F': set([])\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "b825ca4a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A B F E D C " - ] - } - ], - "source": [ - "dfs(tree, 'A') # A B D F E C" - ] - }, - { - "cell_type": "markdown", - "id": "da082c74", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Depth-first search: pre-order, in-order, and post-order.\n", - "\n", - "For **depth-first search**, there are different types of processing: *pre-order*, *in-order*, and *post-order*, based on when the processing is done (before/after exploring the root or the children). Notation :\n", - "\n", - "- R = Root\n", - "- D = Right subtree\n", - "- G = Left subtree\n", - "\n", - "There are three (main) types of traversal:\n", - "\n", - "- **Pre-order**: R G D\n", - "- **In-order**: G R D\n", - "- **Post-order**: G D R\n" - ] - }, - { - "cell_type": "markdown", - "id": "f0c20b5a", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Depth-first traversal: pre-order, in-order, and post-order.\n", - "\n", - "Implementation of the strategies:\n", - "\n", - "```python\n", - "def preorder(R):\n", - " if not empty(R):\n", - " process(R) # Root\n", - " preorder(left(R)) # Left\n", - " preorder(right(R)) # Right\n", - "\n", - "def inorder(R):\n", - " if not empty(R):\n", - " inorder(left(R)) # Left\n", - " process(R) # Root\n", - " inorder(right(R)) # Right\n", - "\n", - "def postorder(R):\n", - " if not empty(R):\n", - " postorder(left(R)) # Left\n", - " postorder(right(R)) # Right\n", - " postorder(R) # Rooot\n" - ] - }, - { - "cell_type": "markdown", - "id": "6d493663", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Example\n", - "\n", - "We will use this data structure" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "c0785d33", - "metadata": {}, - "outputs": [], - "source": [ - "class Node:\n", - " def __init__(self, value):\n", - " self.value = value\n", - " self.left = None\n", - " self.right = None\n", - " \n", - " def get_value(self):\n", - " return self.value\n", - " \n", - " def set_value(self, v = None):\n", - " self.value = v" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "ffcda583", - "metadata": {}, - "outputs": [], - "source": [ - "root = Node(\"dog\")\n", - "root.left = Node(\"little\")\n", - "root.left.left = Node(\"the\")\n", - "root.right = Node(\"very\")\n", - "root.right.left = Node(\"is\")\n", - "root.right.right = Node(\"cute\")" - ] - }, - { - "cell_type": "markdown", - "id": "5389c181", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Example\n", - "\n", - "_How to get the sentence in the correct order?_" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "8cf38ed1", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", - "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", - " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", - "<!-- Generated by graphviz version 7.1.0 (20230121.1956)\n", - " -->\n", - "<!-- Pages: 1 -->\n", - "<svg width=\"206pt\" height=\"188pt\"\n", - " viewBox=\"0.00 0.00 206.00 188.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", - "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 184)\">\n", - "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-184 202,-184 202,4 -4,4\"/>\n", - "<!-- dog -->\n", - "<g id=\"node1\" class=\"node\">\n", - "<title>dog</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"63\" cy=\"-162\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"63\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">dog</text>\n", - "</g>\n", - "<!-- little -->\n", - "<g id=\"node2\" class=\"node\">\n", - "<title>little</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"27\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">little</text>\n", - "</g>\n", - "<!-- dog->little -->\n", - "<g id=\"edge1\" class=\"edge\">\n", - "<title>dog->little</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M54.65,-144.76C50.42,-136.55 45.19,-126.37 40.42,-117.09\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"43.68,-115.79 36,-108.49 37.46,-118.99 43.68,-115.79\"/>\n", - "</g>\n", - "<!-- very -->\n", - "<g id=\"node4\" class=\"node\">\n", - "<title>very</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"99\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">very</text>\n", - "</g>\n", - "<!-- dog->very -->\n", - "<g id=\"edge3\" class=\"edge\">\n", - "<title>dog->very</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M71.35,-144.76C75.58,-136.55 80.81,-126.37 85.58,-117.09\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"88.54,-118.99 90,-108.49 82.32,-115.79 88.54,-118.99\"/>\n", - "</g>\n", - "<!-- the -->\n", - "<g id=\"node3\" class=\"node\">\n", - "<title>the</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"27\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">the</text>\n", - "</g>\n", - "<!-- little->the -->\n", - "<g id=\"edge2\" class=\"edge\">\n", - "<title>little->the</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M27,-71.7C27,-64.41 27,-55.73 27,-47.54\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"30.5,-47.62 27,-37.62 23.5,-47.62 30.5,-47.62\"/>\n", - "</g>\n", - "<!-- is -->\n", - "<g id=\"node5\" class=\"node\">\n", - "<title>is</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"99\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">is</text>\n", - "</g>\n", - "<!-- very->is -->\n", - "<g id=\"edge4\" class=\"edge\">\n", - "<title>very->is</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M99,-71.7C99,-64.41 99,-55.73 99,-47.54\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"102.5,-47.62 99,-37.62 95.5,-47.62 102.5,-47.62\"/>\n", - "</g>\n", - "<!-- cute -->\n", - "<g id=\"node6\" class=\"node\">\n", - "<title>cute</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"171\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"171\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">cute</text>\n", - "</g>\n", - "<!-- very->cute -->\n", - "<g id=\"edge5\" class=\"edge\">\n", - "<title>very->cute</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M113.92,-74.5C123.77,-64.92 136.86,-52.19 148.03,-41.34\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"150.29,-44.02 155.02,-34.54 145.41,-39 150.29,-44.02\"/>\n", - "</g>\n", - "</g>\n", - "</svg>\n" - ], - "text/plain": [ - "<graphviz.graphs.Digraph at 0x106e7ec50>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "draw_binary_tree(root)" - ] - }, - { - "cell_type": "markdown", - "id": "93045f0c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Depth-first traversal pre-order (OOP + iterative)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "b0d812d9", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [], - "source": [ - "def iterative_inorder_traversal(node):\n", - " stack = [node]\n", - " while stack:\n", - " current_node = stack.pop()\n", - " print(current_node.value)\n", - " if current_node.right:\n", - " stack.append(current_node.right)\n", - " if current_node.left:\n", - " stack.append(current_node.left)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "b2dc9113", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "dog\n", - "little\n", - "the\n", - "very\n", - "is\n", - "cute\n" - ] - } - ], - "source": [ - "iterative_inorder_traversal(root)" - ] - }, - { - "cell_type": "markdown", - "id": "f368960e", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Depth-first traversal pre-order (dict + recursive)\n", - "_Recursive implementation using a dictionnary data structure._" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "552dc46b", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "TT = {\"dog\": [\"little\", \"very\"],\n", - " \"little\": [\"the\"],\n", - " \"the\": [],\n", - " \"very\": [\"is\", \"cute\"],\n", - " \"is\": [],\n", - " \"cute\": []\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "2cafcf1c", - "metadata": {}, - "outputs": [], - "source": [ - "def preorder(T, node):\n", - " if node is not None:\n", - " print(node)\n", - " if len(T[node]) > 0:\n", - " preorder(T, T[node][0])\n", - " if len(T[node]) > 1:\n", - " preorder(T, T[node][1])" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "8b8f5c52", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "dog\n", - "little\n", - "the\n", - "very\n", - "is\n", - "cute\n" - ] - } - ], - "source": [ - "preorder(TT, \"dog\")" - ] - }, - { - "cell_type": "markdown", - "id": "75c93b72", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "_Iterative version._" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "fbbb9408", - "metadata": {}, - "outputs": [], - "source": [ - "def preorder_traversal(T, node):\n", - " stack = [node]\n", - " \n", - " while stack:\n", - " current_node = stack.pop()\n", - " print(current_node)\n", - " \n", - " if len(T[current_node]) > 1:\n", - " stack.append(T[current_node][1])\n", - " \n", - " if len(T[current_node]) > 0:\n", - " stack.append(T[current_node][0])" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "f040425d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "dog\n", - "little\n", - "the\n", - "very\n", - "is\n", - "cute\n" - ] - } - ], - "source": [ - "preorder_traversal(TT, \"dog\")" - ] - }, - { - "cell_type": "markdown", - "id": "1aeacaad", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Solution: inorder traversal" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "8a950f0f", - "metadata": {}, - "outputs": [], - "source": [ - "def inorder(T, node):\n", - " if node is not None:\n", - " if len(T[node]) > 0:\n", - " inorder(T, T[node][0])\n", - " print(node)\n", - " if len(T[node]) > 1:\n", - " inorder(T, T[node][1])" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "49625d32", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "the\n", - "little\n", - "dog\n", - "is\n", - "very\n", - "cute\n" - ] - } - ], - "source": [ - "inorder(TT, \"dog\")" - ] - }, - { - "cell_type": "markdown", - "id": "c88e5f58", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Breadth-first search (or traversal)\n", - "\n", - "> **Breadth-first search (BFS)** is a traversal method that visits all the nodes in a tree (or a graph) level by level.\n", - "\n", - "\n", - "```\n", - " 1\n", - " / \\\n", - " 2 3\n", - " / \\\n", - " 4 5\n", - "```\n", - "\n", - "The main difference will be that we use a Queue instead of a Stack" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "73ceb6d8", - "metadata": {}, - "outputs": [], - "source": [ - "def bfs_print(node):\n", - " if node is None:\n", - " return\n", - "\n", - " queue = [node]\n", - "\n", - " while queue:\n", - " current_node = queue.pop(0)\n", - " print(current_node.value, end=' ')\n", - "\n", - " if current_node.left:\n", - " queue.append(current_node.left)\n", - "\n", - " if current_node.right:\n", - " queue.append(current_node.right)" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "1e1a1f21", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [], - "source": [ - "root = Node(1)\n", - "root.left = Node(2)\n", - "root.right = Node(3)\n", - "root.left.left = Node(4)\n", - "root.left.right = Node(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "74a2317d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 2 3 4 5 " - ] - } - ], - "source": [ - "bfs_print(root)" - ] - } - ], - "metadata": { - "celltoolbar": "Slideshow", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/lectures-notebooks/10-trees.ipynb b/lectures-notebooks/10-trees.ipynb deleted file mode 100644 index b7ce4b33120817b4487e0fc8fbc1ca18c9ad06df..0000000000000000000000000000000000000000 --- a/lectures-notebooks/10-trees.ipynb +++ /dev/null @@ -1,1370 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "09fc003e", - "metadata": {}, - "source": [ - "# UE5 Fundamentals of Algorithms\n", - "## Lecture 10: Trees\n", - "### Ecole Centrale de Lyon, Bachelor of Science in Data Science for Responsible Business\n", - "#### Romain Vuillemot\n", - "<center><img src=\"figures/Logo_ECL.png\" style=\"width:300px\"></center>" - ] - }, - { - "cell_type": "markdown", - "id": "74743087", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, - "source": [ - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a9f48d96", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "from graphviz import Digraph\n", - "from IPython.display import display\n", - "from utils import draw_tree_dict" - ] - }, - { - "cell_type": "markdown", - "id": "f3ebe7d2", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Outline\n", - "- Definitions\n", - "- Data structures\n", - "- Weighted trees" - ] - }, - { - "cell_type": "markdown", - "id": "a4973a08", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "# Trees\n", - "\n", - "\n", - "> Tree is a hierarchical data structure with nodes connected by edges\n", - "\n", - "- A non-linear data structures (multiple ways to traverse it)\n", - "- Nodes are connected by only one path (a series of edges) so trees have no cycle\n", - "- Edges are also called links, they can be traversed in both ways (no orientation)\n", - "\n", - "Example of trees:\n", - "\n", - "- Binary trees, binary search trees, N-ary trees, recursive call trees, etc.\n", - "\n", - "- HOB (Horizontally Ordered Binary), AVL (Adelson-Velskii and Landis, self-balancing trees), ...\n", - "\n", - "- B-trees, forests, lattices, etc.\n" - ] - }, - { - "cell_type": "markdown", - "id": "e35608be", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Definitions on trees\n", - "\n", - "(similar to the ones for the binary trees)\n", - "\n", - "`Nodes` - a tree is composed of nodes that contain a `value` and `children`.\n", - "\n", - "`Edges` - are the connections between nodes; nodes may contain a value.\n", - "\n", - "`Root` - the topmost node in a tree; there can only be one root.\n", - "\n", - "`Parent and child` - each node has a single parent and up to two children.\n", - "\n", - "`Leaf` - no node below that node.\n", - "\n", - "`Depth` - the number of edges on the path from the root to that node.\n", - "\n", - "`Height` - maximum depth in a tree." - ] - }, - { - "cell_type": "markdown", - "id": "67f767cf", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Definitions on trees (cont.)\n", - "\n", - "`N-ary Tree`- a tree in which each node can have up to $N$ children. Binary trees is the case where $N = 2$.\n", - "\n", - "`Weight` - a quantity is associated to the edges.\n", - "\n", - "`Degree` - the number of child nodes it has. Binary tree is the case where degree is 2.\n", - "\n", - "`Subtree` - a portion of a tree that is itself a tree.\n", - "\n", - "`Forest` - a collection of trees not connected to each other." - ] - }, - { - "cell_type": "markdown", - "id": "bb8e8697", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structures (dicts + lists)\n", - "\n", - "A simple way is the adjacency list using a dictionnary `dict` type." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "53891826", - "metadata": {}, - "outputs": [], - "source": [ - "tree_dict = {\n", - " \"a\": [\"b\", \"c\"],\n", - " \"b\": [\"d\", \"e\"],\n", - " \"c\": [\"f\"],\n", - " \"d\": [],\n", - " \"e\": [],\n", - " \"f\": []\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "cf1c0607", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", - "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", - " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", - "<!-- Generated by graphviz version 7.1.0 (20230121.1956)\n", - " -->\n", - "<!-- Pages: 1 -->\n", - "<svg width=\"206pt\" height=\"188pt\"\n", - " viewBox=\"0.00 0.00 206.00 188.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", - "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 184)\">\n", - "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-184 202,-184 202,4 -4,4\"/>\n", - "<!-- a -->\n", - "<g id=\"node1\" class=\"node\">\n", - "<title>a</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"135\" cy=\"-162\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"135\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">a</text>\n", - "</g>\n", - "<!-- b -->\n", - "<g id=\"node2\" class=\"node\">\n", - "<title>b</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"99\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">b</text>\n", - "</g>\n", - "<!-- a->b -->\n", - "<g id=\"edge1\" class=\"edge\">\n", - "<title>a->b</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M126.65,-144.76C122.42,-136.55 117.19,-126.37 112.42,-117.09\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"115.68,-115.79 108,-108.49 109.46,-118.99 115.68,-115.79\"/>\n", - "</g>\n", - "<!-- c -->\n", - "<g id=\"node3\" class=\"node\">\n", - "<title>c</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"171\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"171\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">c</text>\n", - "</g>\n", - "<!-- a->c -->\n", - "<g id=\"edge2\" class=\"edge\">\n", - "<title>a->c</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M143.35,-144.76C147.58,-136.55 152.81,-126.37 157.58,-117.09\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"160.54,-118.99 162,-108.49 154.32,-115.79 160.54,-118.99\"/>\n", - "</g>\n", - "<!-- d -->\n", - "<g id=\"node4\" class=\"node\">\n", - "<title>d</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"27\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">d</text>\n", - "</g>\n", - "<!-- b->d -->\n", - "<g id=\"edge3\" class=\"edge\">\n", - "<title>b->d</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M84.08,-74.5C74.23,-64.92 61.14,-52.19 49.97,-41.34\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"52.59,-39 42.98,-34.54 47.71,-44.02 52.59,-39\"/>\n", - "</g>\n", - "<!-- e -->\n", - "<g id=\"node5\" class=\"node\">\n", - "<title>e</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"99\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">e</text>\n", - "</g>\n", - "<!-- b->e -->\n", - "<g id=\"edge4\" class=\"edge\">\n", - "<title>b->e</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M99,-71.7C99,-64.41 99,-55.73 99,-47.54\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"102.5,-47.62 99,-37.62 95.5,-47.62 102.5,-47.62\"/>\n", - "</g>\n", - "<!-- f -->\n", - "<g id=\"node6\" class=\"node\">\n", - "<title>f</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"171\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"171\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">f</text>\n", - "</g>\n", - "<!-- c->f -->\n", - "<g id=\"edge5\" class=\"edge\">\n", - "<title>c->f</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M171,-71.7C171,-64.41 171,-55.73 171,-47.54\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"174.5,-47.62 171,-37.62 167.5,-47.62 174.5,-47.62\"/>\n", - "</g>\n", - "</g>\n", - "</svg>\n" - ], - "text/plain": [ - "<graphviz.graphs.Digraph at 0x103f43d00>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "draw_tree_dict(tree_dict)" - ] - }, - { - "cell_type": "markdown", - "id": "4f833d5f", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structures (dicts + named lists)\n", - "\n", - "- A variation is to use a named variable for the list." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "15a30278", - "metadata": {}, - "outputs": [], - "source": [ - "tree_dict_name = {\n", - " \"a\": {\"neighbors\": [\"b\", \"c\"]},\n", - " \"b\": {\"neighbors\": [\"d\", \"e\"]},\n", - " \"c\": {\"neighbors\": [\"f\"]},\n", - " \"d\": {\"neighbors\": []},\n", - " \"e\": {\"neighbors\": []},\n", - " \"f\": {\"neighbors\": []}\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "bf6182a0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['b', 'c']" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tree_dict_name[\"a\"][\"neighbors\"]" - ] - }, - { - "cell_type": "markdown", - "id": "c3c23285", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structures (sets)\n", - "\n", - "- The children are unique and not ordered" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "d996b53e", - "metadata": {}, - "outputs": [], - "source": [ - "tree_set = {\n", - " \"a\": set([\"b\", \"c\"]),\n", - " \"b\": set([\"d\", \"e\"]),\n", - " \"c\": set([\"f\"]),\n", - " \"d\": set(),\n", - " \"e\": set(),\n", - " \"f\": set()\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "735ef0c3", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structures (lists of lists)\n", - "\n", - "- Each node is an entry in the list\n", - "- Children are sub-lists" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "ed1ae4b2", - "metadata": {}, - "outputs": [], - "source": [ - "tree_list = [\n", - " ['a', ['b', 'c']],\n", - " ['b', ['d', 'e']],\n", - " ['c', ['f', 'g']],\n", - " ['d', []],\n", - " ['e', []],\n", - " ['f', []],\n", - " ['g', []] \n", - "]" - ] - }, - { - "cell_type": "markdown", - "id": "ec31a4a3", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structures (tuples)\n", - "\n", - "- Each node is the first tuple\n", - "- Children are additionnal tuply entries\n", - "- Warning: tuples are immutable (cannot be changed)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "36be872b", - "metadata": {}, - "outputs": [], - "source": [ - "tree_tuple = (\"a\", [\n", - " (\"b\", []),\n", - " (\"c\", [\n", - " (\"d\", [\n", - " (\"e\", [])\n", - " ])\n", - " ])\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "160d1a92", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'a'" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tree_tuple[0] # cannot be changed" - ] - }, - { - "cell_type": "markdown", - "id": "f30c5bc6", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structure (class object)\n", - "\n", - "How to create the tree? How to retrieve all nodes? Both iterative and recursive ways.\n", - "\n", - "```python\n", - "class Node:\n", - " def __init__(self, value, children = []):\n", - " self.value = value\n", - " self.children = children\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "7d12baee", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "class Node:\n", - " def __init__(self, value, children = []):\n", - " self.value = value\n", - " self.children = children\n", - "\n", - " def get_all_nodes(self):\n", - " nodes = [self.value]\n", - " for child in self.children:\n", - " nodes += child.get_all_nodes()\n", - " return nodes\n", - " \n", - " def get_all_nodes_iterative(self):\n", - " nodes = []\n", - " stack = [self]\n", - " while stack:\n", - " current_node = stack.pop()\n", - " nodes.append(current_node.value)\n", - " stack += current_node.children\n", - " return nodes" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "4e21434d", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [], - "source": [ - "root = Node(\"a\", [\n", - " Node(\"b\", [\n", - " Node(\"d\"),\n", - " Node(\"e\"),\n", - " ]),\n", - " Node(\"c\", [\n", - " Node(\"f\"),\n", - " ]),\n", - "])\n", - "\n", - "# or using \"root.children\"" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "3ac188f3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['a', 'b', 'd', 'e', 'c', 'f']" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "root.get_all_nodes()" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "bf8772d3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['a', 'c', 'f', 'b', 'e', 'd']" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "root.get_all_nodes_iterative()" - ] - }, - { - "cell_type": "markdown", - "id": "f8aefc1e", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "# Weighted trees\n", - "\n", - "> Trees with a quantity associated to the links or the nodes\n", - "\n", - "- Useful to quantifie both nodes and links\n", - "- Storing those values require additionnal data structures" - ] - }, - { - "cell_type": "markdown", - "id": "b66a3424", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structures for weighted trees (dicts for edges)\n", - "\n", - "- We need to add an extra value to encode values in edges " - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "f26343c9", - "metadata": {}, - "outputs": [], - "source": [ - "tree_w_dict = {'a': [{'b': 0}, {'c': 0}],\n", - " 'b': [{'d': 0}, {'e': 0}],\n", - " 'c': [{'f': 0}],\n", - " 'd': [],\n", - " 'e': []\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "a50608c9", - "metadata": {}, - "outputs": [], - "source": [ - "tree_w_tuple = {\n", - " 'a': [('b', 0), ('c', 0)],\n", - " 'b': [('d', 0), ('e', 0)],\n", - " 'c': [('f', 0)],\n", - " 'd': [],\n", - " 'e': []\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "e5f7dfb7", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Weigthted trees as classes" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "a81e7954", - "metadata": {}, - "outputs": [], - "source": [ - "class Node_weight:\n", - " def __init__(self, data, weight=0):\n", - " self.data = data\n", - " self.children = []\n", - " self.weight = weight\n", - "\n", - " \n", - "tree = Node_weight(1)\n", - "child1 = Node_weight(2, weight=5)\n", - "child2 = Node_weight(3, weight=7)\n", - "tree.children = [child1, child2]" - ] - }, - { - "cell_type": "markdown", - "id": "50918649", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Exercise: Calculate the total weight of a tree\n", - "\n", - "_Tip: go through all the nodes and get the edges, then sum their weights._" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "35793e38", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def get_tree_edges(root):\n", - " edges = []\n", - " stack = [(root, None)]\n", - "\n", - " while stack:\n", - " node, parent_data = stack.pop()\n", - " \n", - " for child in node.children:\n", - " stack.append((child, node.data))\n", - " edges.append((node.data, child.data, child.weight))\n", - "\n", - " return edges" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "8d143474", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(1, 2, 5), (1, 3, 7)]" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tree_w_oo = Node_weight(1)\n", - "child1 = Node_weight(2, weight=5)\n", - "child2 = Node_weight(3, weight=7)\n", - "tree_w_oo.children = [child1, child2]\n", - "get_tree_edges(tree_w_oo)" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "864d9a68", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "12" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sum(tpl[2] for tpl in get_tree_edges(tree_w_oo))" - ] - }, - { - "cell_type": "markdown", - "id": "cad8ffb6", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Exercise: Calculate the total weight of a tree\n", - "\n", - "A recursive version:" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "36b7cce1", - "metadata": {}, - "outputs": [], - "source": [ - "def calculate_total_weight(node):\n", - " total_weight = node.weight\n", - " for child in node.children:\n", - " total_weight += calculate_total_weight(child)\n", - " return total_weight" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "a3371c5a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "12" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "calculate_total_weight(tree_w_oo)" - ] - }, - { - "cell_type": "markdown", - "id": "df3b5ca2", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## An Edge class for edges\n", - "\n", - "- To consider edges as objects\n", - "- Can be used as a complement of the nodes (or without the nodes)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "f8094971", - "metadata": {}, - "outputs": [], - "source": [ - "class Edge:\n", - " def __init__(self, source, target):\n", - " self.source = source\n", - " self.target = target\n", - "\n", - "class Node:\n", - " def __init__(self, label):\n", - " self.label = label\n", - " self.children = []\n", - "\n", - "class Tree:\n", - " def __init__(self, root_label):\n", - " self.root = Node(root_label)\n", - " self.edges = []" - ] - }, - { - "cell_type": "markdown", - "id": "659ca77e", - "metadata": {}, - "source": [ - "## Main trees properties\n", - "\n", - "- Hierarchical structure\n", - "- No cycle\n", - "- All nodes connected\n", - "\n", - "We will mostly use one of the two traversal methods (BFS and DFS) to achieve this.\n", - "\n", - "Also we will using the dictionnary-based data structure:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d45a5bb6", - "metadata": {}, - "outputs": [], - "source": [ - "tree = {\n", - " \"A\": [\"B\", \"C\"],\n", - " \"B\": [\"D\", \"E\"],\n", - " \"C\": [\"F\", \"G\"],\n", - " \"D\": [\"H\", \"I\"],\n", - " \"E\": [\"J\"],\n", - " \"F\": [],\n", - " \"G\": [],\n", - " \"H\": [],\n", - " \"I\": [],\n", - " \"J\": []\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "2737d1a8", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", - "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", - " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", - "<!-- Generated by graphviz version 7.1.0 (20230121.1956)\n", - " -->\n", - "<!-- Pages: 1 -->\n", - "<svg width=\"350pt\" height=\"260pt\"\n", - " viewBox=\"0.00 0.00 350.00 260.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", - "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 256)\">\n", - "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-256 346,-256 346,4 -4,4\"/>\n", - "<!-- A -->\n", - "<g id=\"node1\" class=\"node\">\n", - "<title>A</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"207\" cy=\"-234\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"207\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">A</text>\n", - "</g>\n", - "<!-- B -->\n", - "<g id=\"node2\" class=\"node\">\n", - "<title>B</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"171\" cy=\"-162\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"171\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">B</text>\n", - "</g>\n", - "<!-- A->B -->\n", - "<g id=\"edge1\" class=\"edge\">\n", - "<title>A->B</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M198.65,-216.76C194.42,-208.55 189.19,-198.37 184.42,-189.09\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"187.68,-187.79 180,-180.49 181.46,-190.99 187.68,-187.79\"/>\n", - "</g>\n", - "<!-- C -->\n", - "<g id=\"node3\" class=\"node\">\n", - "<title>C</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"243\" cy=\"-162\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"243\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">C</text>\n", - "</g>\n", - "<!-- A->C -->\n", - "<g id=\"edge2\" class=\"edge\">\n", - "<title>A->C</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M215.35,-216.76C219.58,-208.55 224.81,-198.37 229.58,-189.09\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"232.54,-190.99 234,-180.49 226.32,-187.79 232.54,-190.99\"/>\n", - "</g>\n", - "<!-- D -->\n", - "<g id=\"node4\" class=\"node\">\n", - "<title>D</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"99\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">D</text>\n", - "</g>\n", - "<!-- B->D -->\n", - "<g id=\"edge3\" class=\"edge\">\n", - "<title>B->D</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M156.08,-146.5C146.23,-136.92 133.14,-124.19 121.97,-113.34\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"124.59,-111 114.98,-106.54 119.71,-116.02 124.59,-111\"/>\n", - "</g>\n", - "<!-- E -->\n", - "<g id=\"node5\" class=\"node\">\n", - "<title>E</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"171\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"171\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">E</text>\n", - "</g>\n", - "<!-- B->E -->\n", - "<g id=\"edge4\" class=\"edge\">\n", - "<title>B->E</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M171,-143.7C171,-136.41 171,-127.73 171,-119.54\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"174.5,-119.62 171,-109.62 167.5,-119.62 174.5,-119.62\"/>\n", - "</g>\n", - "<!-- F -->\n", - "<g id=\"node6\" class=\"node\">\n", - "<title>F</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"243\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"243\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">F</text>\n", - "</g>\n", - "<!-- C->F -->\n", - "<g id=\"edge5\" class=\"edge\">\n", - "<title>C->F</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M243,-143.7C243,-136.41 243,-127.73 243,-119.54\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"246.5,-119.62 243,-109.62 239.5,-119.62 246.5,-119.62\"/>\n", - "</g>\n", - "<!-- G -->\n", - "<g id=\"node7\" class=\"node\">\n", - "<title>G</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"315\" cy=\"-90\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"315\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">G</text>\n", - "</g>\n", - "<!-- C->G -->\n", - "<g id=\"edge6\" class=\"edge\">\n", - "<title>C->G</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M257.92,-146.5C267.77,-136.92 280.86,-124.19 292.03,-113.34\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"294.29,-116.02 299.02,-106.54 289.41,-111 294.29,-116.02\"/>\n", - "</g>\n", - "<!-- H -->\n", - "<g id=\"node8\" class=\"node\">\n", - "<title>H</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"27\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">H</text>\n", - "</g>\n", - "<!-- D->H -->\n", - "<g id=\"edge7\" class=\"edge\">\n", - "<title>D->H</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M84.08,-74.5C74.23,-64.92 61.14,-52.19 49.97,-41.34\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"52.59,-39 42.98,-34.54 47.71,-44.02 52.59,-39\"/>\n", - "</g>\n", - "<!-- I -->\n", - "<g id=\"node9\" class=\"node\">\n", - "<title>I</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"99\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"99\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">I</text>\n", - "</g>\n", - "<!-- D->I -->\n", - "<g id=\"edge8\" class=\"edge\">\n", - "<title>D->I</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M99,-71.7C99,-64.41 99,-55.73 99,-47.54\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"102.5,-47.62 99,-37.62 95.5,-47.62 102.5,-47.62\"/>\n", - "</g>\n", - "<!-- J -->\n", - "<g id=\"node10\" class=\"node\">\n", - "<title>J</title>\n", - "<ellipse fill=\"none\" stroke=\"black\" cx=\"171\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\n", - "<text text-anchor=\"middle\" x=\"171\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">J</text>\n", - "</g>\n", - "<!-- E->J -->\n", - "<g id=\"edge9\" class=\"edge\">\n", - "<title>E->J</title>\n", - "<path fill=\"none\" stroke=\"black\" d=\"M171,-71.7C171,-64.41 171,-55.73 171,-47.54\"/>\n", - "<polygon fill=\"black\" stroke=\"black\" points=\"174.5,-47.62 171,-37.62 167.5,-47.62 174.5,-47.62\"/>\n", - "</g>\n", - "</g>\n", - "</svg>\n" - ], - "text/plain": [ - "<graphviz.graphs.Digraph at 0x10a59bcd0>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "draw_tree_dict(tree)" - ] - }, - { - "cell_type": "markdown", - "id": "121006f2", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Generalized BFS (Breadth-First Search)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "ba62dbbc", - "metadata": {}, - "outputs": [], - "source": [ - "def bfs(tree, start_node):\n", - " queue = [start_node]\n", - " result = []\n", - "\n", - " while queue:\n", - " node = queue.pop(0)\n", - " result.append(node)\n", - " children = tree.get(node, [])\n", - "\n", - " for child in children:\n", - " if child is not None:\n", - " queue.append(child)\n", - "\n", - " return result" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "5ec0e5f9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']\n" - ] - } - ], - "source": [ - "print(bfs(tree, \"A\"))" - ] - }, - { - "cell_type": "markdown", - "id": "18c6b5e6", - "metadata": {}, - "source": [ - "## Generalized DFS (Depth-First Search)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0968d63f", - "metadata": {}, - "outputs": [], - "source": [ - "def dfs(tree, start_node):\n", - " stack = [start_node]\n", - " result = []\n", - "\n", - " while stack:\n", - " node = stack.pop()\n", - " result.append(node)\n", - " children = tree.get(node, [])\n", - "\n", - " for child in children:\n", - " if child is not None:\n", - " stack.append(child)\n", - "\n", - " return result\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4c3f173c", - "metadata": {}, - "outputs": [], - "source": [ - "print(dfs(tree, \"A\"))" - ] - }, - { - "cell_type": "markdown", - "id": "6c9a2f9f", - "metadata": {}, - "source": [ - "## Tree property: are all nodes connected?\n", - "\n", - "Without having a first node and re-using the dfs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "405e887d", - "metadata": {}, - "outputs": [], - "source": [ - "def is_tree_connected(tree, start_node):\n", - " if not tree:\n", - " return True # An empty tree is considered connected.\n", - "\n", - " visited = set()\n", - " stack = []\n", - "\n", - " stack.append(start_node)\n", - "\n", - " while stack:\n", - " node = stack.pop()\n", - " if node not in visited:\n", - " visited.add(node)\n", - " stack.extend(tree.get(node, []))\n", - "\n", - " return len(visited) == len(tree)\n", - "\n", - "\n", - "is_tree_connected(tree, \"A\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5852f357", - "metadata": {}, - "outputs": [], - "source": [ - "dfs_check_connected(tree, \"A\")" - ] - }, - { - "cell_type": "markdown", - "id": "f76000b9", - "metadata": {}, - "source": [ - "## Tree property: does the tree have a cycle?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a1907c6", - "metadata": {}, - "outputs": [], - "source": [ - "def has_cycle_dfs(root):\n", - " def dfs(node, parent, visited):\n", - " if node in visited:\n", - " if parent is not None and parent != visited[node]:\n", - " return True\n", - " return False\n", - "\n", - " visited[node] = parent\n", - "\n", - " for child in node.children:\n", - " if dfs(child, node, visited):\n", - " return True\n", - "\n", - " return False\n", - "\n", - " visited = {}\n", - " return dfs(root, None, visited)" - ] - }, - { - "cell_type": "markdown", - "id": "ffb17bd1", - "metadata": {}, - "source": [ - "## What if we add an extra node \"K\"?\n", - "\n", - "> tree[\"F\"] = [\"A\"]" - ] - }, - { - "cell_type": "markdown", - "id": "408393f0", - "metadata": {}, - "source": [ - "## Tree property: Check if the tree is an n-ary tree" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91ef03ea", - "metadata": {}, - "outputs": [], - "source": [ - "def is_binary_tree(tree, node, n = 2, visited=None):\n", - " if visited is None:\n", - " visited = set()\n", - "\n", - " if node in visited:\n", - " return True\n", - "\n", - " visited.add(node)\n", - " children = tree.get(node, [])\n", - "\n", - " if len(children) > n:\n", - " return False\n", - "\n", - " for child in children:\n", - " if not is_binary_tree(tree, child, n, visited):\n", - " return False\n", - "\n", - " return True\n", - "\n", - "is_binary_tree(tree, \"A\", 2)" - ] - }, - { - "cell_type": "markdown", - "id": "e4279f90", - "metadata": {}, - "source": [ - "## Get all the edges of a tree" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "858814ab", - "metadata": {}, - "outputs": [], - "source": [ - "def generate_edges(graph):\n", - " edges = []\n", - " for node, neighbors in graph.items():\n", - " for neighbor in neighbors:\n", - " edges.append((node, neighbor))\n", - " return edges" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "7ab4d047", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('A', 'B'),\n", - " ('A', 'C'),\n", - " ('B', 'D'),\n", - " ('B', 'E'),\n", - " ('C', 'F'),\n", - " ('C', 'G'),\n", - " ('D', 'H'),\n", - " ('D', 'I'),\n", - " ('E', 'J')]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "generate_edges(tree)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "3db5ca0d", - "metadata": {}, - "outputs": [], - "source": [ - "def generate_edges_dfs(graph, start_node):\n", - " edges = []\n", - " stack = [start_node]\n", - " visited = []\n", - "\n", - " while stack:\n", - " node = stack.pop()\n", - " visited.append(node)\n", - " for neighbor in graph[node]:\n", - " if neighbor not in visited:\n", - " edges.append((node, neighbor))\n", - " stack.append(neighbor)\n", - "\n", - " return edges\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "f3425b5c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{('A', 'B'),\n", - " ('A', 'C'),\n", - " ('B', 'D'),\n", - " ('B', 'E'),\n", - " ('C', 'F'),\n", - " ('C', 'G'),\n", - " ('D', 'H'),\n", - " ('D', 'I'),\n", - " ('E', 'J')}" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "generate_edges_dfs(tree, \"A\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7963ad24", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff0ba08e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "celltoolbar": "Slideshow", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/lectures-notebooks/11-graphs.ipynb b/lectures-notebooks/11-graphs.ipynb deleted file mode 100644 index fa910d207edce50f91032859227075510fc0747b..0000000000000000000000000000000000000000 --- a/lectures-notebooks/11-graphs.ipynb +++ /dev/null @@ -1,2137 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "09fc003e", - "metadata": {}, - "source": [ - "# UE5 Fundamentals of Algorithms\n", - "## Lecture 11: Graphs\n", - "### Ecole Centrale de Lyon, Bachelor of Science in Data Science for Responsible Business\n", - "#### Romain Vuillemot\n", - "<center><img src=\"figures/Logo_ECL.png\" style=\"width:300px\"></center>" - ] - }, - { - "cell_type": "markdown", - "id": "74743087", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, - "source": [ - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "ead453f3", - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import os\n", - "from graphviz import Digraph\n", - "from IPython.display import display\n", - "from utils import visualize_graph_nx, visualize_graph_w" - ] - }, - { - "cell_type": "markdown", - "id": "f3ebe7d2", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Outline\n", - "\n", - "- Definitions\n", - "- Data structures\n", - "- Properties\n", - "- Weighted graphs and spanning trees\n", - "- Shortest paths" - ] - }, - { - "cell_type": "markdown", - "id": "a5c1dca1", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "# Graphs\n", - "\n", - "\n", - "> A **graph** is an abstract data structure consisting of a set of vertices connected by edges.\n", - "\n", - "- Trees are a specific case of a graph (acyclic, connected graphs)\n", - "\n", - "Examples:\n", - "\n", - "- Messaging: the traveling salesman problem, postal routes\n", - "- Communication networks\n", - "- Traffic management: flow problems, minimum congestion paths, ...\n", - "- Air navigation (aircraft in sky corridors!)\n", - "- Closed transportation system (closed circuit): goods delivery, TSP (Traveling Salesman Problem).\n", - "- Printed circuit board wiring" - ] - }, - { - "cell_type": "markdown", - "id": "30f450b7", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Definition\n", - "\n", - "\n", - "Graph $G = (V, E)$ with:\n", - "\n", - "- $V$: set of nodes (vertices).\n", - "- $E \\in (V \\times V)$: set of edges (links) or arcs (if oriented).\n", - "\n", - "Properties:\n", - "\n", - "- **Connected graph**: with a path between any pair of nodes.\n", - "- **Directed graphs**: where edges have a specific direction.\n", - "- **Weighted graphs**: numerical values associated with nodes or edges.\n", - "- **Strongly connected graphs**: directed graphs where there is a path from any node to any other node.\n", - "- **Bipartite**: vertices can be divided into two sets with no edges within a set.\n", - "- **Dense graph**: with a high edge-to-vertex ratio, often with $|E| = O(|V|^2)$.\n", - "- **Path**: a sequence of connected nodes with vertice.\n", - "- **Cycle**: a path that starts and ends at the same vertex.\n", - "- **Degree**: number of edges connected to a node." - ] - }, - { - "cell_type": "markdown", - "id": "5cc0eab1", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structures: dict\n", - "\n", - "- Using a dictionnary with adjacency list (similar to trees without cycles and non-connected nodes)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "63e36dcf", - "metadata": {}, - "outputs": [], - "source": [ - "g = { \"a\" : [\"d\"],\n", - " \"b\" : [\"c\"],\n", - " \"c\" : [\"b\", \"c\", \"d\", \"e\"],\n", - " \"d\" : [\"a\", \"c\"],\n", - " \"e\" : [\"c\"], \n", - " \"f\" : [] \n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "c0413aac", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dict_keys(['a', 'b', 'c', 'd', 'e', 'f'])" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g.keys() # nodes" - ] - }, - { - "cell_type": "markdown", - "id": "b5995983", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structures: dict" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "2ecde2a7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('a', 'd'),\n", - " ('b', 'c'),\n", - " ('c', 'b'),\n", - " ('c', 'c'),\n", - " ('c', 'd'),\n", - " ('c', 'e'),\n", - " ('d', 'a'),\n", - " ('d', 'c'),\n", - " ('e', 'c')]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def generate_edges(graph):\n", - " edges = []\n", - " for node, neighbors in graph.items():\n", - " for neighbor in neighbors:\n", - " edges.append((node, neighbor))\n", - " return edges\n", - "\n", - "generate_edges(g) # edges" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "f7d6c3d1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('a', 'd'),\n", - " ('b', 'c'),\n", - " ('c', 'b'),\n", - " ('c', 'c'),\n", - " ('c', 'd'),\n", - " ('c', 'e'),\n", - " ('d', 'a'),\n", - " ('d', 'c'),\n", - " ('e', 'c')]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[(vertex, neighbor) for vertex, neighbors \n", - " in g.items() for neighbor in neighbors]" - ] - }, - { - "cell_type": "markdown", - "id": "b39566f5", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graphs: node-link representation" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "2e937dc7", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "<Figure size 640x480 with 1 Axes>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "visualize_graph_nx(g)" - ] - }, - { - "cell_type": "markdown", - "id": "e1996237", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## DFS\n", - "\n", - "> Depth-First Search (DFS) starts exploring graphs at a source node, explores as far as possible along each branch before backtracking. \n", - "\n", - "- Similar than for trees \n", - "- But needs to memorize visited nodes \n", - "\n", - "Steps:\n", - "\n", - "1. Put the source node into the stack.\n", - "2. Remove the node at the top of the stack to process it.\n", - "3. Put all unexplored neighbors into the stack (at the top).\n", - "4. If the stack is not empty, go back to step 2." - ] - }, - { - "cell_type": "markdown", - "id": "9a8d7a69", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## DFS with external visited list (iterative)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "7a863da7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "a d c b e " - ] - } - ], - "source": [ - "def dfs(graph, start_node):\n", - " visited = set()\n", - " stack = [start_node]\n", - "\n", - " while stack:\n", - " node = stack.pop()\n", - " if node not in visited:\n", - " print(node, end=' ')\n", - " visited.add(node)\n", - " for neighbor in reversed(graph[node]):\n", - " if neighbor not in visited:\n", - " stack.append(neighbor)\n", - " \n", - "dfs(g, 'a') # start from node 'a'." - ] - }, - { - "cell_type": "markdown", - "id": "865e6d9e", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## DFS with external visited list (recursive)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "9fe25ca1", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "a d c b e " - ] - } - ], - "source": [ - "def dfs_rec(graph, start_node, visited=set()):\n", - " if start_node not in visited:\n", - " print(start_node, end=' ')\n", - " visited.add(start_node)\n", - " for neighbor in graph[start_node]:\n", - " if neighbor not in visited:\n", - " dfs_rec(graph, neighbor, visited)\n", - "\n", - "dfs_rec(g, 'a') # start from node 'a'." - ] - }, - { - "cell_type": "markdown", - "id": "8c628bee", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## DFS with internal visited list (recursive)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "efeb1fa0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A B D E F C " - ] - } - ], - "source": [ - "def dfs(graph, start_node):\n", - " if start_node not in graph:\n", - " return\n", - "\n", - " print(start_node, end=' ')\n", - " graph[start_node]['visited'] = True\n", - "\n", - " for neighbor in graph[start_node]['neighbors']:\n", - " if not graph[neighbor]['visited']:\n", - " dfs(graph, neighbor)\n", - "\n", - "graph = {\n", - " 'A': {'neighbors': ['B', 'C'], 'visited': False},\n", - " 'B': {'neighbors': ['A', 'D', 'E'], 'visited': False},\n", - " 'C': {'neighbors': ['A', 'F'], 'visited': False},\n", - " 'D': {'neighbors': ['B'], 'visited': False},\n", - " 'E': {'neighbors': ['B', 'F'], 'visited': False},\n", - " 'F': {'neighbors': ['C', 'E'], 'visited': False}\n", - "}\n", - "\n", - "dfs(graph, 'A')" - ] - }, - { - "cell_type": "markdown", - "id": "2e6cee31", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## BFS\n", - "\n", - "> Breadth-First Search (BFS) starts exploring graphs at a source node, explores all of its neighbors at the current depth before moving on to nodes at the next depth level.\n", - "\n", - "- Similar to DFS, it also requires tracking visited nodes to avoid revisiting them.\n", - "\n", - "Steps for BFS:\n", - "\n", - "1. Put the source node into the queue.\n", - "2. Remove the node at the front of the queue to process it.\n", - "3. Explore all unvisited neighbors of the processed node and enqueue them at the back of the queue.\n", - "4. If the queue is not empty, go back to step 2." - ] - }, - { - "cell_type": "markdown", - "id": "67afb125", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## BFS with external visited list (iterative)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "99dcb888", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A B C D E F " - ] - } - ], - "source": [ - "def bfs(graph, start_node):\n", - " visited = set()\n", - " queue = [start_node]\n", - "\n", - " while queue:\n", - " node = queue.pop(0)\n", - " if node not in visited:\n", - " print(node, end=' ')\n", - " visited.add(node)\n", - " for neighbor in graph.get(node, []):\n", - " if neighbor not in visited:\n", - " queue.append(neighbor)\n", - "\n", - "graph = {\n", - " 'A': ['B', 'C'],\n", - " 'B': ['A', 'D', 'E'],\n", - " 'C': ['A', 'F'],\n", - " 'D': ['B'],\n", - " 'E': ['B', 'F'],\n", - " 'F': ['C', 'E']\n", - "}\n", - "\n", - "bfs(graph, 'A')" - ] - }, - { - "cell_type": "markdown", - "id": "b48f3a6e", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## BFS with backtracking\n", - "\n", - "To memorize the path used to visit nodes." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "071787a7", - "metadata": {}, - "outputs": [], - "source": [ - "def bfs_with_backtracking(graph, start_node, seeked_node):\n", - " visited = {node: False for node in graph}\n", - " path = {node: None for node in graph}\n", - " queue = [start_node]\n", - " found = False\n", - "\n", - " while queue:\n", - " current_node = queue.pop(0)\n", - " visited[current_node] = True\n", - "\n", - " for neighbor in graph[current_node]:\n", - " if not visited[neighbor]:\n", - " queue.append(neighbor)\n", - " visited[neighbor] = True\n", - " path[neighbor] = current_node\n", - "\n", - " if neighbor == seeked_node:\n", - " found = True\n", - " break\n", - "\n", - " if found:\n", - " break\n", - "\n", - " if not found:\n", - " return \"Path not found\"\n", - "\n", - " node = seeked_node\n", - " path_sequence = []\n", - " while node is not None:\n", - " path_sequence.insert(0, node)\n", - " node = path[node]\n", - "\n", - " return path_sequence" - ] - }, - { - "cell_type": "markdown", - "id": "7376c14d", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## BFS with backtracking\n", - "\n", - "Path re-construction from the BFS exploration:\n", - "\n", - "```python\n", - " if not found:\n", - " return \"Path not found\"\n", - "\n", - " node = seeked_node\n", - " path_sequence = []\n", - " while node is not None:\n", - " path_sequence.insert(0, node)\n", - " node = path[node]\n", - "\n", - " return path_sequence\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "ebc07421", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Path from A to F: ['A', 'C', 'F']\n" - ] - } - ], - "source": [ - "graph = {\n", - " 'A': ['B', 'C'],\n", - " 'B': ['A', 'D', 'E'],\n", - " 'C': ['A', 'F'],\n", - " 'D': ['B'],\n", - " 'E': ['B', 'F'],\n", - " 'F': ['C', 'E']\n", - "}\n", - "\n", - "start_node = 'A'\n", - "seeked_node = 'F'\n", - "path = bfs_with_backtracking(graph, start_node, seeked_node)\n", - "print(f\"Path from {start_node} to {seeked_node}: {path}\")" - ] - }, - { - "cell_type": "markdown", - "id": "12bd1417", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graph property: path between two nodes?\n", - "\n", - "INPUT: a list of edges" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "4eae7ecd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "There is a path between 0 and 5: True\n" - ] - } - ], - "source": [ - "def has_path(edges, n, start, end):\n", - " voisins = [[] for i in range(n)]\n", - " for i, j in edges:\n", - " voisins[i].append(j)\n", - " voisins[j].append(i)\n", - "\n", - " stack = [start]\n", - " visited = set(stack)\n", - " while stack:\n", - " cur = stack.pop()\n", - " if cur == end:\n", - " return True\n", - " for v in voisins[cur]:\n", - " if v not in visited:\n", - " stack.append(v)\n", - " visited.add(v)\n", - " return False\n", - "\n", - "edges = [(0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 5)]\n", - "num_nodes = 6 # number of unique nodes\n", - "start_node = 0; end_node = 5\n", - "result = has_path(edges, num_nodes, start_node, end_node)\n", - "print(f\"There is a path between {start_node} and {end_node}: {result}\")" - ] - }, - { - "cell_type": "markdown", - "id": "0f3d7abc", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structures: OOP\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "5f5bbbc5", - "metadata": {}, - "outputs": [], - "source": [ - "class Graph:\n", - " def __init__(self):\n", - " self.graph = {}\n", - "\n", - " def add_vertex(self, vertex):\n", - " if vertex not in self.graph:\n", - " self.graph[vertex] = []\n", - "\n", - " def add_edge(self, vertex1, vertex2):\n", - " if vertex1 in self.graph and vertex2 in self.graph:\n", - " self.graph[vertex1].append(vertex2)\n", - " self.graph[vertex2].append(vertex1) \n", - "\n", - " def get_nodes(self):\n", - " return list(self.graph.keys())\n", - "\n", - " def get_edges(self):\n", - " edges = []\n", - " for vertex, neighbors in self.graph.items():\n", - " for neighbor in neighbors:\n", - " if (vertex, neighbor) not in edges and (neighbor, vertex) not in edges:\n", - " edges.append((vertex, neighbor))\n", - " return edges\n", - "\n", - " def __str__(self):\n", - " return str(self.graph)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "b8565be4", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Nodes: ['a', 'b', 'c', 'd', 'e', 'f']\n", - "Edges: [('a', 'd'), ('b', 'c'), ('c', 'c'), ('c', 'd'), ('c', 'e')]\n" - ] - } - ], - "source": [ - "g_obj = Graph()\n", - "\n", - "for vertex in [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\"]:\n", - " g_obj.add_vertex(vertex)\n", - "\n", - "# Add edges\n", - "g_obj.add_edge(\"a\", \"d\")\n", - "g_obj.add_edge(\"b\", \"c\")\n", - "g_obj.add_edge(\"c\", \"b\")\n", - "g_obj.add_edge(\"c\", \"c\")\n", - "g_obj.add_edge(\"c\", \"d\")\n", - "g_obj.add_edge(\"c\", \"e\")\n", - "g_obj.add_edge(\"d\", \"a\")\n", - "g_obj.add_edge(\"d\", \"c\")\n", - "g_obj.add_edge(\"e\", \"c\")\n", - "\n", - "\n", - "print(\"Nodes:\", g_obj.get_nodes())\n", - "g_obj.get_edges() == generate_edges(g)\n", - "print(\"Edges:\", g_obj.get_edges())" - ] - }, - { - "cell_type": "markdown", - "id": "f33ed24b", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## DFS using oop\n", - "\n", - "(Only explores a single connex component)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "ac540bec", - "metadata": {}, - "outputs": [], - "source": [ - "def dfs(self, start_vertex, visited = set()):\n", - " stack = [start_vertex]\n", - "\n", - " while stack:\n", - " vertex = stack.pop()\n", - " if vertex not in visited:\n", - " print(vertex, end=' ')\n", - " visited.add(vertex)\n", - " neighbors = self.graph[vertex]\n", - " for neighbor in neighbors:\n", - " if neighbor not in visited:\n", - " stack.append(neighbor)\n", - "\n", - "Graph.dfs = dfs # update the Graph class" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "aec8122f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "a d c e b " - ] - } - ], - "source": [ - "g_obj.dfs(\"a\")" - ] - }, - { - "cell_type": "markdown", - "id": "c5360edc", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## DFS using oop\n", - "\n", - "Explores all the graph components" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "85b84e94", - "metadata": {}, - "outputs": [], - "source": [ - "def components(self):\n", - " visited = set()\n", - "\n", - " for vertex in self.graph:\n", - " if vertex not in visited:\n", - " self.dfs(vertex, visited)\n", - " print()\n", - "Graph.components = components # update the Graph class" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "643b438b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "a d c e b \n", - "f \n" - ] - } - ], - "source": [ - "g_obj.components()" - ] - }, - { - "cell_type": "markdown", - "id": "09e71c1e", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graph property: can a graph be n-colored?\n", - "\n", - "Two adjacent vertices (connected by an edge) cannot have the same color when properly colored. Example with $n = 2$ (i.e. can a graph be colored with 2 colors)." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "0395ed9d", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "class Node:\n", - " def __init__(self, v = None, n = []):\n", - " self.v = v\n", - " self.n = n\n", - " self.visited = False\n", - "\n", - "def two_color(r):\n", - " \n", - " stack = [r]\n", - " \n", - " while len(stack) > 0:\n", - " c = stack.pop(0)\n", - " for n in c.n:\n", - " if(c.v == n.v): # neighbours have same color\n", - " return False\n", - " if not n.visited:\n", - " stack.append(n)\n", - " n.visited = True \n", - "\n", - " return True" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "8fda865c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "True\n" - ] - } - ], - "source": [ - "n1 = Node(\"gray\")\n", - "n2 = Node(\"black\")\n", - "n3 = Node(\"gray\")\n", - "n4 = Node(\"gray\")\n", - "n5 = Node(\"black\")\n", - "n6 = Node(\"gray\")\n", - "\n", - "n1.n = [n2]\n", - "n2.n = [n1, n3, n4]\n", - "n3.n = [n2, n5]\n", - "n4.n = [n2, n5]\n", - "n5.n = [n3, n4, n6]\n", - "n6.n = [n5]\n", - "\n", - "print(two_color(n1)) " - ] - }, - { - "cell_type": "markdown", - "id": "d6584072", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Data structure: Adjacency matrix\n", - "\n", - "- Square: it has the same number of rows and columns.\n", - "- A 1 in a cell $m_{ij}$ indicates a link between nodes `i` and `j`.\n", - "- A 1 on the diagonal would indicate a loop.\n", - "- It is symmetric: $m_{ij} = m_{ji}$ for an undirected graph.\n", - "- For valued graphs, cells contain values (instead of `1`).\n" - ] - }, - { - "cell_type": "markdown", - "id": "95501f5c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Adjacency matrix (example)\n", - "\n", - "Given the graph `G`, what is its corresponding adjacency matrix?\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "e045bef6", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0, 0, 0, 1, 0, 0]\n", - "[0, 0, 1, 0, 0, 0]\n", - "[0, 1, 1, 1, 1, 0]\n", - "[1, 0, 1, 0, 0, 0]\n", - "[0, 0, 1, 0, 0, 0]\n", - "[0, 0, 0, 0, 0, 0]\n" - ] - } - ], - "source": [ - "nodes = sorted(g.keys())\n", - "num_nodes = len(nodes)\n", - "adj_matrix = [[0] * num_nodes for _ in range(num_nodes)]\n", - "xf\n", - "for i, node in enumerate(nodes):\n", - " connected_nodes = g[node]\n", - " for connected_node in connected_nodes:\n", - " j = nodes.index(connected_node)\n", - " adj_matrix[i][j] = 1\n", - "\n", - "for row in adj_matrix:\n", - " print(row)" - ] - }, - { - "cell_type": "markdown", - "id": "99ade3d5", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Adjacency matrix (OOP)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "c1419547", - "metadata": {}, - "outputs": [], - "source": [ - "class GraphAdj:\n", - "\n", - " def __init__(self, n):\n", - " self.__n = n\n", - " self.__g = [[0 for _ in range(n)] for _ in range(n)]\n", - "\n", - " for i in range(0, self.__n):\n", - " for j in range(0, self.__n):\n", - " self.__g[i][j] = 0\n", - "\n", - "\n", - " def addEdge(self, x, y):\n", - " if (x < 0) or (x >= self.__n):\n", - " print(\"Vertex {} does not exist!\".format(x))\n", - " if (y < 0) or (y >= self.__n):\n", - " print(\"Vertex {} does not exist!\".format(y))\n", - "\n", - " if(x == y):\n", - " print(\"Same Vertex!\")\n", - " else:\n", - " self.__g[y][x] = 1\n", - " self.__g[x][y] = 1\n", - "\n", - " def displayAdjacencyMatrix(self):\n", - " for i in range(0, self.__n):\n", - " print()\n", - " for j in range(0, self.__n):\n", - " print(\"\", self.__g[i][j], end = \"\")\n", - "\n", - " def removeEdge(self, x, y):\n", - " if (x < 0) or (x >= self.__n):\n", - " print(\"Vertex {} does not exist!\".format(x))\n", - " if (y < 0) or (y >= self.__n):\n", - " print(\"Vertex {} does not exist!\".format(y))\n", - " if(x == y):\n", - " print(\"Same Vertex!\")\n", - " else:\n", - " self.__g[y][x] = 0\n", - " self.__g[x][y] = 0" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "079f999c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " 0 1 1 1 1 0\n", - " 1 0 0 1 0 0\n", - " 1 0 0 1 1 1\n", - " 1 1 1 0 0 1\n", - " 1 0 1 0 0 0\n", - " 0 0 1 1 0 0" - ] - } - ], - "source": [ - "obj = GraphAdj(6)\n", - "\n", - "obj.addEdge(0, 1)\n", - "obj.addEdge(0, 2)\n", - "obj.addEdge(0, 3)\n", - "obj.addEdge(0, 4)\n", - "obj.addEdge(1, 3)\n", - "obj.addEdge(2, 3)\n", - "obj.addEdge(2, 4)\n", - "obj.addEdge(2, 5)\n", - "obj.addEdge(3, 5)\n", - "\n", - "obj.displayAdjacencyMatrix()" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "ecf8ed5a", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " 0 1 1 1 1 0\n", - " 1 0 0 1 0 0\n", - " 1 0 0 0 1 1\n", - " 1 1 0 0 0 1\n", - " 1 0 1 0 0 0\n", - " 0 0 1 1 0 0" - ] - } - ], - "source": [ - "obj.removeEdge(2, 3);\n", - "obj.displayAdjacencyMatrix();" - ] - }, - { - "cell_type": "markdown", - "id": "d6ec3132", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graph property: is a graph connected? (matrix)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "1542423b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The graph is connected: False\n" - ] - } - ], - "source": [ - "def is_connected(graph):\n", - " n = len(graph)\n", - " visited = [False] * n\n", - " stack = [0]\n", - " while stack:\n", - " node = stack.pop()\n", - " if not visited[node]:\n", - " visited[node] = True\n", - " for i in range(n):\n", - " if graph[node][i] == 1 and not visited[i]:\n", - " stack.append(i)\n", - " return visited.count(True) == len(graph)\n", - "\n", - "g_m = [\n", - " [0, 0, 0, 0, 0],\n", - " [0, 0, 1, 0, 0],\n", - " [0, 1, 0, 1, 0],\n", - " [0, 0, 1, 0, 1],\n", - " [0, 0, 0, 1, 0]\n", - "]\n", - "\n", - "# We do a DFS\n", - "is_graph_connected = is_connected(g_m)\n", - "print(f\"The graph is connected: {is_graph_connected}\")" - ] - }, - { - "cell_type": "markdown", - "id": "99e73122", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graph property: how many connected components? (matrix)" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "a4809d88", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def dfs(adj_matrix, node, visited):\n", - " visited[node] = True\n", - " for neighbor, connected in enumerate(adj_matrix[node]):\n", - " if connected and not visited[neighbor]:\n", - " dfs(adj_matrix, neighbor, visited)\n", - "\n", - "def count_connected_components(adj_matrix):\n", - " num_nodes = len(adj_matrix)\n", - " visited = [False] * num_nodes\n", - " components = 0\n", - "\n", - " for i in range(num_nodes):\n", - " if not visited[i]:\n", - " dfs(adj_matrix, i, visited)\n", - " components += 1\n", - "\n", - " return components" - ] - }, - { - "cell_type": "markdown", - "id": "a886fa9c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graph property: is there a self-connected node? (matrix)\n", - "\n", - "I.e is there for instance a node A $\\rightarrow$ A" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "id": "81106cd6", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "def has_ones_in_diagonal(matrix):\n", - " for i in range(len(matrix)):\n", - " if matrix[i][i] == 1:\n", - " return True\n", - " return False" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "70044448", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# We check if there is a `1` in the diagonal\n", - "has_ones_in_diagonal(g_m)" - ] - }, - { - "cell_type": "markdown", - "id": "083bc1ef", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graph property: is a graph oriented? (matrix)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "9e0860f6", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "# check if the matrix is equal to its transpose.\n", - "def is_symmetric(matrix):\n", - " rows = len(matrix)\n", - " cols = len(matrix[0])\n", - "\n", - " for i in range(rows):\n", - " for j in range(cols):\n", - " if matrix[i][j] != matrix[j][i]:\n", - " return False\n", - " return True" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "0f6f774b", - "metadata": {}, - "outputs": [], - "source": [ - "g_empty = []\n", - "n = 5\n", - "for i in range(n):\n", - " row = []\n", - " for j in range(n):\n", - " row.append(0)\n", - " g_empty.append(row)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "0ae92b34", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0, 0, 0, 0, 0]\n", - "[0, 0, 0, 0, 0]\n", - "[0, 0, 0, 0, 0]\n", - "[0, 0, 0, 0, 0]\n", - "[0, 0, 0, 0, 0]\n" - ] - } - ], - "source": [ - "for r in g_empty:\n", - " print(r, end=\"\\n\")" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "3928df4f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "is_symmetric(g_empty)" - ] - }, - { - "cell_type": "markdown", - "id": "2be90e37", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graph property: is a graph connected? (dict)\n", - "\n", - "We check if the dfs equals the number of nodes." - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "93eb41a0", - "metadata": {}, - "outputs": [], - "source": [ - "def is_connected(graph):\n", - " if not graph:\n", - " return True\n", - "\n", - " visited = set()\n", - " start_node = list(graph.keys())[0]\n", - "\n", - " def dfs(node):\n", - " visited.add(node)\n", - " for neighbor in graph[node]:\n", - " if neighbor not in visited:\n", - " dfs(neighbor)\n", - "\n", - " dfs(start_node)\n", - "\n", - " return len(visited) == len(graph)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "b53901d2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "is_connected(g)" - ] - }, - { - "cell_type": "markdown", - "id": "3c584a2a", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Weighted graphs\n", - "\n", - "Graph with numerical values associated with nodes or edges.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "e6aae369", - "metadata": {}, - "outputs": [], - "source": [ - "graph_w = {\n", - " \"a\": [(\"d\", 1)],\n", - " \"b\": [(\"c\", 3)],\n", - " \"c\": [(\"a\", 1), (\"b\", 3), (\"d\", 1), (\"e\", 1)],\n", - " \"d\": [(\"a\", 1), (\"c\", 1)],\n", - " \"e\": [(\"c\", 1)],\n", - " \"f\": []\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "e6ffdf83", - "metadata": {}, - "outputs": [], - "source": [ - "def greedy_heuristic_shortest_path(graph, start, end):\n", - " current_node = start\n", - " visited = set()\n", - "\n", - " while current_node != end:\n", - " visited.add(current_node)\n", - " min_weight = float('inf')\n", - " next_node = None\n", - "\n", - " # Find the neighboring unvisited node with the smallest weight\n", - " for neighbor, weight in graph[current_node]:\n", - " if neighbor not in visited and weight < min_weight:\n", - " min_weight = weight\n", - " next_node = neighbor\n", - "\n", - " if next_node is None:\n", - " return float('inf') # No path found\n", - "\n", - " current_node = next_node\n", - "\n", - " return 0 # Path found from start to end" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "93ae7fa1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "greedy_heuristic_shortest_path(graph_w, \"a\", \"e\")" - ] - }, - { - "cell_type": "markdown", - "id": "729aa7e8", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Weighted graphs" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "c4f13ab0", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "<Figure size 640x480 with 1 Axes>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "visualize_graph_w(graph_w)" - ] - }, - { - "cell_type": "markdown", - "id": "0eb464fd", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Spanning Trees\n", - "\n", - "> A **Minimum Spanning Tree (MST)** of a graph is a subset of edges that connects all vertices while minimizing the total sum of the edge values.\n", - "\n", - "- If a graph has $N$ vertices, its MST (Minimum Spanning Tree) will have $N-1$ edges.\n", - "\n", - "- A graph can have multiple spanning trees, but the MST is the one with the lowest weight.\n", - "\n", - "- A tree has only one spanning tree: itself.\n", - "\n", - "\n", - "Question: What is the minimum spanning tree of this graph?" - ] - }, - { - "cell_type": "markdown", - "id": "de9abb12", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - " <img src=\"figures/spanning-tree.png\" style=\"height:10cm;\">" - ] - }, - { - "cell_type": "markdown", - "id": "3a8c8f2a", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - " <img src=\"figures/spanning-tree-sol-1.png\" style=\"height: 10cm;\">" - ] - }, - { - "cell_type": "markdown", - "id": "95fa19f1", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - " <img src=\"figures/spanning-tree-sol-2.png\" style=\"height: 10cm;\">" - ] - }, - { - "cell_type": "markdown", - "id": "1e7c93c1", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Weighted Graph MST finding: Prim's Algorithm\n", - "\n", - "\n", - "1. Start with an initial tree reduced to a single vertex of the graph.\n", - "\n", - "2. At each iteration, expand the tree by adding the available free vertex with the smallest possible weight.\n", - "\n", - "3. Stop when the tree becomes spanning.\n", - "\n", - "\n", - "Programming Strategy?" - ] - }, - { - "cell_type": "markdown", - "id": "b6aeca7f", - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "source": [ - "Greedy" - ] - }, - { - "cell_type": "markdown", - "id": "02e04cbb", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - " <img src=\"figures/prim-kruskal.png\" style=\"height:15cm;\">" - ] - }, - { - "cell_type": "markdown", - "id": "1c710862", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Weighted Graph MST finding: Prim's Algorithm" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "30da5bfd", - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [], - "source": [ - "from heapq import heapify, heappop, heappush\n", - "\n", - "def prim(graph):\n", - " mst = []\n", - " start_vertex = list(graph.keys())[0]\n", - " priority_queue = [(0, start_vertex)]\n", - " visited = set()\n", - " \n", - " while priority_queue:\n", - " weight, current_vertex = heappop(priority_queue)\n", - " if current_vertex not in visited:\n", - " mst.append((current_vertex, weight))\n", - " visited.add(current_vertex)\n", - " for neighbor, edge_weight in graph[current_vertex]:\n", - " if neighbor not in visited:\n", - " heappush(priority_queue, (edge_weight, neighbor))\n", - " return mst" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "bc4f1d67", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('a', 0), ('d', 1), ('c', 1), ('b', 1), ('e', 1)]" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "prim(graph_w)" - ] - }, - { - "cell_type": "markdown", - "id": "3589712a", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graphs: shortest paths\n", - "\n", - "What is the shortest path from $s \\rightarrow z$? \n", - "\n", - "<img src=\"figures/bellman-solo.png\" style=\"height:5cm;\">\n", - "\n", - "Approaches:\n", - "\n", - "1. **BFS with local minimum (greedy):**\n", - "2. **BFS with global minimum (dynamic programming):**\n", - "3. Other?" - ] - }, - { - "cell_type": "markdown", - "id": "b43e0f7b", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graphs: shortest paths (BFS)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "bbd17edb", - "metadata": {}, - "outputs": [], - "source": [ - "graph_s = {\n", - " \"s\": [(\"t\", 6), (\"y\", 7)],\n", - " \"t\": [(\"x\", 5), (\"y\", 8), (\"z\", -4)],\n", - " \"y\": [(\"x\", -3), (\"z\", 9)],\n", - " \"x\": [(\"t\", -2)],\n", - " \"z\": [(\"s\", 2), (\"x\", 7)]\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "2c7eaade", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [], - "source": [ - "def bfs_path(graph, start, end):\n", - " if start == end:\n", - " return [start]\n", - "\n", - " visited = set()\n", - " queue = [(start, [], 0)]\n", - "\n", - " while queue:\n", - " queue.sort(key=lambda x: x[2])\n", - " current, path, cost = queue.pop(0)\n", - " visited.add(current)\n", - "\n", - " for neighbor, edge_cost in graph[current]:\n", - " if neighbor not in visited:\n", - " if neighbor == end:\n", - " return path + [current, neighbor]\n", - " queue.append((neighbor, path + [current], cost + edge_cost))\n", - "\n", - " return None\n" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "70e22a2d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Path from s to z : ['s', 't', 'z']\n" - ] - } - ], - "source": [ - "start_node = 's'\n", - "end_node = 'z'\n", - "\n", - "path = bfs_path(graph_s, start_node, end_node)\n", - "if path:\n", - " print(\"Path from\", start_node, \"to\", end_node, \":\", path)\n", - "else:\n", - " print(\"No path found from\", start_node, \"to\", end_node)" - ] - }, - { - "cell_type": "markdown", - "id": "73bd4b61", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Graphs: shortest paths (Bellman-Ford)\n", - "\n", - "- **Objective**: Determine the shortest paths from a single source to all other nodes in the graph.\n", - " \n", - "- **Initialization**: Assign an initial distance value of 0 to the source node and infinity to all other nodes.\n", - " \n", - "- **Iterative Relaxation of Edges**:\n", - "\n", - " - Perform $|V| - 1$ iterations ($V$ being the number of vertices).\n", - " \n", - " - For each edge $(u, v)$, update the distance if the distance to node $v$ through node $u$ is shorter than the current distance to $v$.\n", - " \n", - "- **Detection of Negative Cycles**:\n", - "\n", - " - After the $|V| - 1$ iterations, check for negative cycles by iterating through all edges.\n", - " \n", - " - If a shorter path is found, a negative cycle exists.\n" - ] - }, - { - "cell_type": "markdown", - "id": "185e01a2", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "<img src=\"figures/bellman-full.png\" style=\"height:10cm;\">" - ] - }, - { - "cell_type": "markdown", - "id": "45225f32", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - " <img src=\"figures/bellman-algo.png\" style=\"height:10cm;\">" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "c347393e", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [], - "source": [ - "def bellman_ford(graph, src):\n", - " dist = {node: float(\"inf\") for node in graph}\n", - " dist[src] = 0\n", - "\n", - " for _ in range(len(graph) - 1):\n", - " for u in graph:\n", - " for v, w in graph[u]:\n", - " if dist[u] != float(\"inf\") and dist[u] + w < dist[v]:\n", - " dist[v] = dist[u] + w\n", - "\n", - " for u in graph:\n", - " for v, w in graph[u]:\n", - " if dist[u] != float(\"inf\") and dist[u] + w < dist[v]:\n", - " print(\"Le graphe contient des cycles négatifs\")\n", - " return\n", - "\n", - " return dist" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "64704f08", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'s': 0, 't': 2, 'y': 7, 'x': 4, 'z': -2}" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bellman_ford(graph_s, 's')" - ] - }, - { - "cell_type": "markdown", - "id": "94475748", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Dijkstra's Algorithm \n", - "\n", - "- **Objective:** Determine the shortest paths between sources $S$ and nodes in the graph accessible from $S$.\n", - "\n", - "- Incremental and greedy construction of a set of visited nodes $E$ accessible from initial vertex $S$.\n", - "\n", - "- **Initialization:** $E_{0}$ is an empty list and $G = \\{S\\}$.\n", - "\n", - "- Move to the next step:\n", - "\n", - " - $E_{i+1} = E_{i} \\cup \\{ $ node from $G$ outside of $E_{i}$ closest to $S$ by following a path that only passes through nodes in $E_{i} \\}$.\n", - "\n", - "- The vertices entering $E$ in ascending order of distance to $S$.\n", - "\n", - "\n", - "Warning: assumes costs $> 0$.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "949dbfe2", - "metadata": {}, - "outputs": [], - "source": [ - "graph_d = {\n", - " \"s\": [(\"t\", 6), (\"y\", 4)],\n", - " \"t\": [(\"x\", 3), (\"y\", 2)],\n", - " \"y\": [(\"t\", 1), (\"x\", 9), (\"z\", 3)],\n", - " \"x\": [(\"z\", 4)],\n", - " \"z\": [(\"s\", 7), (\"x\", 5)]\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "ceca3bde", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - " <img src=\"figures/dijkstra-algo.png\" style=\"height:10cm;\">" - ] - }, - { - "cell_type": "markdown", - "id": "6d612ac9", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - " <img src=\"figures/dijkstra-full.png\" style=\"height:10cm;\">" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "2770567c", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [], - "source": [ - "def dijkstra(graph, initial):\n", - " visited = {initial: 0}\n", - " path = {}\n", - " nodes = set(graph.keys())\n", - " while nodes:\n", - " min_node = None\n", - " for node in nodes:\n", - " if node in visited:\n", - " if min_node is None:\n", - " min_node = node\n", - " elif visited[node] < visited[min_node]:\n", - " min_node = node\n", - "\n", - " if min_node is None:\n", - " break\n", - "\n", - " nodes.remove(min_node)\n", - " current_weight = visited[min_node]\n", - " for edge, weight in graph[min_node]:\n", - " weight = current_weight + weight\n", - " if edge not in visited or weight < visited[edge]:\n", - " visited[edge] = weight\n", - " path[edge] = min_node\n", - "\n", - " return visited, path" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "7b04d1dc", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "({'s': 0, 't': 5, 'y': 4, 'x': 8, 'z': 7},\n", - " {'t': 'y', 'y': 's', 'x': 't', 'z': 'y'})" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dijkstra(graph_d, 's')" - ] - }, - { - "cell_type": "markdown", - "id": "82240937", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "## Summary of shortest path finding\n", - "\n", - "- Principle of minimizing a cost (optimal sub-problem)\n", - "- Principle of algorithms (Bellman-Ford, Dijkstra, Floyd-Warshall) is to overestimate the weights of the vertices and adjust the cost using a *relaxation* method.\n", - "- The Bellman-Ford algorithm is similar to Dijkstra's. We find the notion of relaxation: $d(j) \\rightarrow \\min(d(j), d(x) + G(x, j))$.\n", - "- Dijkstra does not tolerate negative costs and uses a priority queue to process edges in the correct order and relax each edge only once.\n", - "- Bellman-Ford processes edges in an arbitrary order. It tolerates negative costs. For these reasons, multiple iterations might be necessary.\n", - "- Dijkstra with a cost graph of $1$ resembles breadth-first search (the queue becomes a stack).\n", - "\n" - ] - } - ], - "metadata": { - "celltoolbar": "Slideshow", - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/lectures/03-lists-search-sort slides.pdf b/lectures/03-lists-search-sort slides.pdf deleted file mode 100644 index 16211b335d5bdf808445c0ee3c80383405968fc4..0000000000000000000000000000000000000000 Binary files a/lectures/03-lists-search-sort slides.pdf and /dev/null differ diff --git a/lectures/03-stacks-queues slides.pdf b/lectures/03-stacks-queues slides.pdf new file mode 100644 index 0000000000000000000000000000000000000000..23cc8f2e9b8ea2f5db3d4318d3798e9ec6a342a6 Binary files /dev/null and b/lectures/03-stacks-queues slides.pdf differ diff --git a/lectures/04-05-06-programming-strategies slides.pdf b/lectures/04-05-06-programming-strategies slides.pdf deleted file mode 100644 index 940c58c7e6cb40e3d7413c3bb98ebb2f48c5b520..0000000000000000000000000000000000000000 Binary files a/lectures/04-05-06-programming-strategies slides.pdf and /dev/null differ diff --git a/lectures/07-stacks-queues slides.pdf b/lectures/07-stacks-queues slides.pdf deleted file mode 100644 index 56d0f7dba512cf447aeaffc8ce9cc4d91400f21e..0000000000000000000000000000000000000000 Binary files a/lectures/07-stacks-queues slides.pdf and /dev/null differ diff --git a/lectures/08-binary-trees slides.pdf b/lectures/08-binary-trees slides.pdf deleted file mode 100644 index e3205b78a3cceb5ad61261c40580677e5349d71b..0000000000000000000000000000000000000000 Binary files a/lectures/08-binary-trees slides.pdf and /dev/null differ diff --git a/lectures/09-binary-trees-traversals slides.pdf b/lectures/09-binary-trees-traversals slides.pdf deleted file mode 100644 index 0290dbe2c9c6086029662c8fe663d53cf4ee2632..0000000000000000000000000000000000000000 Binary files a/lectures/09-binary-trees-traversals slides.pdf and /dev/null differ diff --git a/lectures/10-trees slides.pdf b/lectures/10-trees slides.pdf deleted file mode 100644 index ffe565a7777995c02e999beec0a90289698ebcd9..0000000000000000000000000000000000000000 Binary files a/lectures/10-trees slides.pdf and /dev/null differ diff --git a/lectures/11-graphs slides.pdf b/lectures/11-graphs slides.pdf deleted file mode 100644 index dd255643c69bc7f0bbb7f471dbfef32328d730e0..0000000000000000000000000000000000000000 Binary files a/lectures/11-graphs slides.pdf and /dev/null differ