{ "cells": [ { "cell_type": "markdown", "id": "anticipated-minority", "metadata": { "tags": [] }, "source": [ "# Visualizing Sorting Algorithms\n", "\n", "Sorting algorithms vary, for example, in their time and space complexity.\n", "From an artistic viewpoint, they also vary in the way they transiently reposition the elements of the array as the algorithm progresses.\n", "Here, we will animate this transient behavior." ] }, { "cell_type": "markdown", "id": "lucky-health", "metadata": { "tags": [] }, "source": [ "## Initial setup" ] }, { "cell_type": "code", "execution_count": 1, "id": "prerequisite-heaven", "metadata": { "tags": [] }, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from matplotlib import animation\n", "from IPython.display import HTML" ] }, { "cell_type": "markdown", "id": "structural-masters", "metadata": { "tags": [] }, "source": [ "## Implement sorting algorithms\n", "\n", "Each sorting function will be implemented as a generator.\n", "This way, each algorithm will track its state implicitly and the resolution can be adjusted freely by positioning the `yield` calls.\n", "In addition, the raw data is mutable and will be passed by reference. Thus no unnecessary copies are made." ] }, { "cell_type": "code", "execution_count": 2, "id": "fiscal-fifth", "metadata": { "tags": [] }, "outputs": [], "source": [ "def bubblesort(array):\n", " n = len(array)\n", " for i in range(n - 1):\n", " for j in range(0, n - i - 1):\n", " if array[j] > array[j + 1]:\n", " array[j], array[j + 1] = array[j + 1], array[j]\n", "\n", " yield array" ] }, { "cell_type": "code", "execution_count": 3, "id": "excessive-freedom", "metadata": { "tags": [] }, "outputs": [], "source": [ "def insertionsort(array):\n", " for i in range(1, len(array)):\n", " key = array[i]\n", "\n", " j = i - 1\n", " while j >= 0 and key < array[j]:\n", " array[j + 1] = array[j]\n", " j -= 1\n", "\n", " yield array\n", "\n", " array[j + 1] = key\n", "\n", " yield array" ] }, { "cell_type": "code", "execution_count": 4, "id": "mysterious-tampa", "metadata": { "tags": [] }, "outputs": [], "source": [ "def selectionsort(array):\n", " n = len(array)\n", " for i in range(n):\n", " min_idx = i\n", " for j in range(i + 1, n):\n", " if array[min_idx] > array[j]:\n", " min_idx = j\n", "\n", " yield array\n", "\n", " array[i], array[min_idx] = array[min_idx], array[i]\n", "\n", " yield array" ] }, { "cell_type": "code", "execution_count": 5, "id": "advance-benjamin", "metadata": { "tags": [] }, "outputs": [], "source": [ "def cocktailsort(array):\n", " n = len(array)\n", " swapped = True\n", " start = 0\n", " end = n - 1\n", " while swapped:\n", " swapped = False\n", "\n", " for i in range(start, end):\n", " if array[i] > array[i + 1]:\n", " array[i], array[i + 1] = array[i + 1], array[i]\n", " swapped = True\n", "\n", " yield array\n", "\n", " if not swapped:\n", " break\n", "\n", " swapped = False\n", " end = end - 1\n", "\n", " for i in range(end - 1, start - 1, -1):\n", " if array[i] > array[i + 1]:\n", " array[i], array[i + 1] = array[i + 1], array[i]\n", " swapped = True\n", "\n", " yield array\n", "\n", " start = start + 1\n", "\n", " yield array" ] }, { "cell_type": "code", "execution_count": 6, "id": "patient-basics", "metadata": { "tags": [] }, "outputs": [], "source": [ "def partition(array, low, high):\n", " i = low - 1\n", " pivot = array[high]\n", "\n", " array_list = []\n", " for j in range(low, high):\n", " if array[j] < pivot:\n", " i = i + 1\n", " array[i], array[j] = array[j], array[i]\n", "\n", " array_list.append(array.copy())\n", "\n", " array[i + 1], array[high] = array[high], array[i + 1]\n", " array_list.append(array.copy())\n", "\n", " return i + 1, array_list\n", "\n", "\n", "def quicksort(array, low=0, high=None):\n", " if high is None:\n", " high = len(array) - 1\n", "\n", " if low < high:\n", " pi, array_list = partition(array, low, high)\n", " yield from array_list\n", "\n", " yield from quicksort(array, low, pi - 1)\n", " yield from quicksort(array, pi + 1, high)\n", "\n", " yield array" ] }, { "cell_type": "code", "execution_count": 7, "id": "committed-filter", "metadata": { "tags": [] }, "outputs": [], "source": [ "def mergesort(array, left_index=0, right_index=None):\n", " if right_index is None:\n", " right_index = len(array) - 1\n", "\n", " if left_index >= right_index:\n", " return\n", "\n", " mid = (left_index + right_index) // 2\n", " yield from mergesort(array, left_index=left_index, right_index=mid)\n", " yield from mergesort(array, left_index=mid + 1, right_index=right_index)\n", "\n", " left_copy = array[left_index : mid + 1].copy()\n", " right_copy = array[mid + 1 : right_index + 1].copy()\n", "\n", " left_copy_index = 0\n", " right_copy_index = 0\n", " sorted_index = left_index\n", "\n", " while left_copy_index < len(left_copy) and right_copy_index < len(right_copy):\n", " if left_copy[left_copy_index] <= right_copy[right_copy_index]:\n", " array[sorted_index] = left_copy[left_copy_index]\n", " left_copy_index = left_copy_index + 1\n", " else:\n", " array[sorted_index] = right_copy[right_copy_index]\n", " right_copy_index = right_copy_index + 1\n", " sorted_index = sorted_index + 1\n", "\n", " yield array\n", "\n", " while left_copy_index < len(left_copy):\n", " array[sorted_index] = left_copy[left_copy_index]\n", " left_copy_index = left_copy_index + 1\n", " sorted_index = sorted_index + 1\n", "\n", " yield array\n", "\n", " while right_copy_index < len(right_copy):\n", " array[sorted_index] = right_copy[right_copy_index]\n", " right_copy_index = right_copy_index + 1\n", " sorted_index = sorted_index + 1\n", "\n", " yield array\n", "\n", " yield array" ] }, { "cell_type": "code", "execution_count": 8, "id": "hollywood-banana", "metadata": { "tags": [] }, "outputs": [], "source": [ "def heapify(array, n, i):\n", " largest = i\n", " l = 2 * i + 1\n", " r = 2 * i + 2\n", "\n", " if l < n and array[largest] < array[l]:\n", " largest = l\n", " if r < n and array[largest] < array[r]:\n", " largest = r\n", "\n", " if largest != i:\n", " array[i], array[largest] = array[largest], array[i]\n", " yield array\n", "\n", " yield from heapify(array, n, largest)\n", "\n", "\n", "def heapsort(array):\n", " n = len(array)\n", "\n", " for i in range(n // 2 - 1, -1, -1):\n", " yield from heapify(array, n, i)\n", "\n", " yield array\n", "\n", " for i in range(n - 1, 0, -1):\n", " array[i], array[0] = array[0], array[i]\n", " yield from heapify(array, i, 0)\n", "\n", " yield array" ] }, { "cell_type": "code", "execution_count": 9, "id": "material-administrator", "metadata": { "tags": [] }, "outputs": [], "source": [ "def shellsort(array):\n", " n = len(array)\n", " gap = n // 2\n", "\n", " while gap > 0:\n", " for i in range(gap, n):\n", " temp = array[i]\n", "\n", " j = i\n", " while j >= gap and array[j - gap] > temp:\n", " array[j] = array[j - gap]\n", " j -= gap\n", "\n", " yield array\n", "\n", " array[j] = temp\n", "\n", " yield array\n", " gap //= 2" ] }, { "cell_type": "code", "execution_count": 10, "id": "severe-accounting", "metadata": { "tags": [] }, "outputs": [], "source": [ "def stoogesort(array, low=0, high=None):\n", " if high is None:\n", " high = len(array) - 1\n", "\n", " if low >= high:\n", " return\n", "\n", " if array[low] > array[high]:\n", " array[low], array[high] = array[high], array[low]\n", "\n", " yield array\n", "\n", " if high - low + 1 > 2:\n", " sep = (high - low + 1) // 3\n", "\n", " yield from stoogesort(array, low, high - sep)\n", " yield from stoogesort(array, low + sep, high)\n", " yield from stoogesort(array, low, high - sep)\n", "\n", " yield array" ] }, { "cell_type": "code", "execution_count": 11, "id": "dependent-composition", "metadata": { "tags": [] }, "outputs": [], "source": [ "def combsort(array):\n", " n = len(array)\n", " shrink_factor = 1.3\n", " _gap = n\n", " sorted_ = False\n", "\n", " while not sorted_:\n", " _gap /= shrink_factor\n", " gap = int(_gap)\n", "\n", " if gap <= 1:\n", " sorted_ = True\n", " gap = 1\n", "\n", " for i in range(n - gap):\n", " if array[i] > array[i + gap]:\n", " array[i], array[i + gap] = array[i + gap], array[i]\n", " sorted_ = False\n", "\n", " yield array" ] }, { "cell_type": "code", "execution_count": 12, "id": "moral-encyclopedia", "metadata": { "tags": [] }, "outputs": [], "source": [ "algorithm_list = [\n", " bubblesort,\n", " cocktailsort,\n", " combsort,\n", " heapsort,\n", " insertionsort,\n", " mergesort,\n", " quicksort,\n", " selectionsort,\n", " shellsort,\n", " # stoogesort, # takes a long time\n", "]" ] }, { "cell_type": "markdown", "id": "exact-description", "metadata": { "tags": [] }, "source": [ "## Try out one of the algorithms\n", "\n", "Let's see how each generator yields intermediate arrays until it finally sorts it completely." ] }, { "cell_type": "code", "execution_count": 13, "id": "senior-oasis", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initial array: [3 2 1 0]\n", "At iteration 0: [2 3 1 0]\n", "At iteration 1: [2 1 3 0]\n", "At iteration 2: [2 1 0 3]\n", "At iteration 3: [1 2 0 3]\n", "At iteration 4: [1 0 2 3]\n", "At iteration 5: [0 1 2 3]\n", "Final array: [0 1 2 3]\n" ] } ], "source": [ "array = np.arange(4)[::-1]\n", "\n", "print('Initial array:', array)\n", "for i, arr in enumerate(bubblesort(array)):\n", " print(f'At iteration {i}:', arr)\n", "print('Final array:', array)" ] }, { "cell_type": "markdown", "id": "settled-george", "metadata": { "tags": [] }, "source": [ "## Create animations\n", "\n", "The `animate` function is straight-forward, it simply updates the data in each plot.\n", "The data is updated in the `step` function.\n", "Here, a generator is created for each sorting instance (row of each data block) and then executed until all generators are exhausted." ] }, { "cell_type": "code", "execution_count": 14, "id": "naughty-science", "metadata": { "tags": [] }, "outputs": [], "source": [ "def animate(data_list, im_list):\n", " \"\"\"Put each data block in appropriate image.\"\"\"\n", " for data, im in zip(data_list, im_list):\n", " im.set_data(data)\n", " return im_list\n", "\n", "\n", "def step():\n", " \"\"\"Run algorithms.\"\"\"\n", " # initialize sorting functions\n", " generator_list = [\n", " sort_function(row)\n", " for block, sort_function in zip(data_list, algorithm_list)\n", " for row in block\n", " ]\n", "\n", " while True:\n", " has_stopped = 0\n", " for gen in generator_list:\n", " try:\n", " # advance each sorting algorithm by one step\n", " next(gen)\n", " except StopIteration:\n", " has_stopped += 1\n", "\n", " if len(generator_list) == has_stopped:\n", " # all lists are sorted when all generators are exhausted\n", " break\n", "\n", " # yield intermediate result for plotting of animation frame\n", " yield data_list\n", "\n", "\n", "def create_animation(array_length, repetition_num, max_iter_num=50_000, block_size=3):\n", " # TODO: get rid of evil global variables\n", " global data_list\n", "\n", " # generate unsorted data\n", " block = np.repeat([np.arange(array_length).astype(float)], repetition_num, axis=0)\n", " [np.random.shuffle(row) for row in block]\n", "\n", " data_list = [block.copy() for _ in algorithm_list]\n", "\n", " # setup figure\n", " size = np.ceil(np.sqrt(len(algorithm_list))).astype(int)\n", "\n", " fig, ax_grid = plt.subplots(\n", " nrows=size,\n", " ncols=size,\n", " figsize=(size * block_size, size * block_size),\n", " constrained_layout=True,\n", " )\n", " ax_list = ax_grid.ravel()\n", "\n", " im_list = [\n", " ax.imshow(data, interpolation='nearest') for ax, data in zip(ax_list, data_list)\n", " ]\n", " [ax.axis('off') for ax in ax_list]\n", " [ax.set_title(alg.__name__) for ax, alg in zip(ax_list, algorithm_list)]\n", "\n", " # create animation\n", " return animation.FuncAnimation(\n", " fig=fig,\n", " func=animate,\n", " frames=step, # init_func=init_func, # TODO: use init_func to start with appropriate frame\n", " save_count=max_iter_num,\n", " interval=10,\n", " repeat=True,\n", " fargs=(im_list,),\n", " blit=True,\n", " )" ] }, { "cell_type": "code", "execution_count": 15, "id": "cellular-fortune", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 6.16 s, sys: 232 ms, total: 6.39 s\n", "Wall time: 6.17 s\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "