Add notebook 17, optimal control

Atcold · Atcold · commit ee22cf4e6c2e · 2021-07-10T15:06:47.000-04:00
diff --git a/17-optimal_control.ipynb b/17-optimal_control.ipynb
@@ -0,0 +1,362 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from torch import nn\n",
+    "from torch.optim import SGD\n",
+    "from matplotlib import pyplot as plt\n",
+    "from matplotlib import patches\n",
+    "import matplotlib as mpl\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.style.use(['dark_background', 'bmh'])\n",
+    "plt.rc('axes', facecolor='k')\n",
+    "plt.rc('figure', facecolor='k', figsize=(10, 6), dpi=100)  # (17, 10)\n",
+    "plt.rc('savefig', bbox='tight')\n",
+    "plt.rc('axes', labelsize=36)\n",
+    "plt.rc('legend', fontsize=24)\n",
+    "plt.rc('text', usetex=True)\n",
+    "plt.rcParams['text.latex.preamble'] = [r'\\usepackage{bm}']\n",
+    "plt.rc('lines', markersize=10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The state transition equation is the following:\n",
+    "\n",
+    "$$\\def \\vx {\\boldsymbol{\\color{Plum}{x}}}\n",
+    "\\def \\vu {\\boldsymbol{\\color{orange}{u}}}\n",
+    "\\dot{\\vx} = f(\\vx, \\vu) \\quad\n",
+    "\\left\\{\n",
+    "\\begin{array}{l}\n",
+    "\\dot{x} = s \\cos \\theta \\\\\n",
+    "\\dot{y} = s \\sin \\theta \\\\\n",
+    "\\dot{\\theta} = \\frac{s}{L} \\tan \\phi \\\\\n",
+    "\\dot{s} = a\n",
+    "\\end{array}\n",
+    "\\right. \\quad\n",
+    "\\vx = (x\\;y\\;\\theta\\;s) \\quad\n",
+    "\\vu = (\\phi\\;a)\n",
+    "$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def f(x, u, t=None):\n",
+    "    \"\"\"\n",
+    "    Kinematic model for tricycle\n",
+    "    ẋ(t) = f[x(t), u(t), t]\n",
+    "    x: states (x, y, θ, s)\n",
+    "    u: control\n",
+    "    t: time\n",
+    "    f: kinematic model\n",
+    "    ẋ = dx/dt\n",
+    "    x' = x + f(x, u, t) * dt\n",
+    "    \"\"\"\n",
+    "    L = 1  # m\n",
+    "    x, y, θ, s = x\n",
+    "    \n",
+    "    ϕ, a = u\n",
+    "    f = torch.zeros(4)\n",
+    "    f[0] = s * torch.cos(θ)\n",
+    "    f[1] = s * torch.sin(θ)\n",
+    "    f[2] = s / L * torch.tan(ϕ)\n",
+    "    f[3] = a\n",
+    "    return f"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def draw_car(ax, x, y, θ, width=0.4, length=1.0):\n",
+    "    rect = patches.Rectangle(\n",
+    "        (x, y - width / 2), \n",
+    "        length,\n",
+    "        width,\n",
+    "        transform=mpl.transforms.Affine2D().rotate_around(*(x, y), θ) + ax.transData,\n",
+    "        alpha=0.8,\n",
+    "        fill=False,\n",
+    "        ec='grey',\n",
+    "    )\n",
+    "    ax.add_patch(rect)\n",
+    "    \n",
+    "def plot_τ(ax, τ, car=False, ax_lims=None):\n",
+    "    \"\"\"\n",
+    "    Plot trajectory of vehicles\n",
+    "    ax_lims is a tuple of two tuples ((x_lim_left, x_lim_right), (y_lim_bottom, y_lim_top))\n",
+    "    \"\"\"\n",
+    "    if ax_lims is None:\n",
+    "        ax_lims = ((-1, 7), (-2, 2))\n",
+    "    ax.plot(τ[:,0], τ[:,1], 'o-')\n",
+    "    ax.set_aspect('equal')\n",
+    "    ax.grid(True)\n",
+    "    ax.autoscale(False)\n",
+    "    ax.set_xlabel(r'$x \\; [\\mathrm{m}]$')\n",
+    "    ax.set_ylabel(r'$y \\; [\\mathrm{m}]$')\n",
+    "    \n",
+    "    ax.set_xlim(*ax_lims[0])\n",
+    "    ax.set_ylim(*ax_lims[1])\n",
+    "    ax.set_xticks(torch.arange(ax_lims[0][0], ax_lims[0][1] + 1, 1))\n",
+    "    ax.set_yticks(torch.arange(ax_lims[1][0], ax_lims[1][1] + 1, 1))\n",
+    "        \n",
+    "    plt.title('Trajectory')\n",
+    "    if car:\n",
+    "        for x, y, θ in τ[:, :3]:\n",
+    "            draw_car(plt.gca(), x, y, θ)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Manual driving\n",
+    "x = torch.tensor((0, 0, 0, 1),dtype=torch.float32)\n",
+    "# Optimal action from back propagation\n",
+    "u = torch.tensor([\n",
+    "    [0.1280, 0.0182],\n",
+    "    [0.0957, 0.0131],\n",
+    "    [0.0637, 0.0085],\n",
+    "    [0.0318, 0.0043],\n",
+    "    [0.0000, 0.0000]\n",
+    "])\n",
+    "# Brake\n",
+    "u = torch.ones(10, 2) * -0.1\n",
+    "u[:, 0] = 0\n",
+    "# S\n",
+    "u = torch.zeros(10, 2)\n",
+    "u[:5, 0] = 0.2\n",
+    "u[5:, 0] = -0.2\n",
+    "# Straight\n",
+    "# u = torch.zeros(10, 2)\n",
+    "\n",
+    "dt = 1  # s\n",
+    "trajectory = [x.clone()]\n",
+    "for t in range(10):\n",
+    "    x += f(x, u[t]) * dt\n",
+    "    print(x)\n",
+    "    trajectory.append(x.clone())\n",
+    "τ = torch.stack(trajectory)\n",
+    "\n",
+    "# plt.plot(0,0,'gx', markersize=20, markeredgewidth=5)\n",
+    "# plt.plot(5,1,'rx', markersize=20, markeredgewidth=5)\n",
+    "plot_τ(plt.gca(), τ, car=True)\n",
+    "\n",
+    "plt.axis((-1, 10, -1, 5))\n",
+    "name = 'S'\n",
+    "\n",
+    "# plt.axis((-1, 12, -3, 3))\n",
+    "# name = 'straight'\n",
+    "\n",
+    "# plt.axis((-1, 7, -2, 2))\n",
+    "# name = 'brake'\n",
+    "\n",
+    "# plt.savefig(f'{name}.pdf')\n",
+    "\n",
+    "plt.figure(figsize=(6, 2))\n",
+    "plt.title('Control signal')\n",
+    "plt.stem(np.arange(10)+0.9, u[:,0], 'C1', markerfmt='C1o', use_line_collection=True, basefmt='none')\n",
+    "plt.stem(np.arange(10)+1.1, u[:,1], 'C2', markerfmt='C2o', use_line_collection=True, basefmt='none')\n",
+    "plt.ylim((-0.5, 0.5))\n",
+    "plt.xticks(np.arange(12))\n",
+    "plt.xlabel('discrete time index', fontsize=12)\n",
+    "# plt.savefig(f'{name}-ctrl.pdf')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Costs definition\n",
+    "# x: states (x, y, θ, s)\n",
+    "def vanilla_cost(state, target):\n",
+    "    x_x, x_y = target\n",
+    "    return (state[-1][0] - x_x).pow(2) + (state[-1][1] - x_y).pow(2)\n",
+    "\n",
+    "def cost_with_target_s(state, target):\n",
+    "    x_x, x_y = target\n",
+    "    return (state[-1][0] - x_x).pow(2) + (state[-1][1] - x_y).pow(2) + (state[-1][-1]).pow(2)\n",
+    "\n",
+    "def cost_sum_distances(state, target):\n",
+    "    x_x, x_y = target\n",
+    "    dists = ((state[:, 0] - x_x).pow(2) + (state[:, 1] - x_y).pow(2)).pow(0.5)\n",
+    "    return dists.mean()\n",
+    "\n",
+    "def cost_sum_square_distances(state, target):\n",
+    "    x_x, x_y = target\n",
+    "    dists = ((state[:, 0] - x_x).pow(2) + (state[:, 1] - x_y).pow(2))\n",
+    "    return dists.mean()\n",
+    "\n",
+    "def cost_logsumexp(state, target):\n",
+    "    x_x, x_y = target\n",
+    "    dists = ((state[:, 0] - x_x).pow(2) + (state[:, 1] - x_y).pow(2))#.pow(0.5)\n",
+    "    return -1 * torch.logsumexp(-1 * dists, dim=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Path planning\n",
+    "def path_planning_with_cost(x_x, x_y, s, T, epochs, stepsize, cost_f, ax=None, ax_lims=None, debug=False):\n",
+    "    \"\"\"\n",
+    "    Path planning for tricycle\n",
+    "    x_x: x component of postion vector\n",
+    "    x_y: y component of postion vector\n",
+    "    s: initial speed\n",
+    "    T: time steps\n",
+    "    epochs: number of epochs for back propagation\n",
+    "    stepsize: stepsize for back propagation\n",
+    "    cost_f: cost funciton that takes the trajectory and the tuple (x, y) - target.\n",
+    "    ax: axis to plot the trajectory\n",
+    "    \"\"\"\n",
+    "    ax = ax or plt.gca()\n",
+    "    plt.plot(0, 0, 'gx', markersize=20, markeredgewidth=5)\n",
+    "    plt.plot(x_x, x_y, 'rx', markersize=20, markeredgewidth=5)\n",
+    "    u = nn.Parameter(torch.zeros(T, 2))\n",
+    "    optimizer = SGD((u,), lr=stepsize)\n",
+    "    dt = 1  # s\n",
+    "    costs = []\n",
+    "    for epoch in range(epochs):\n",
+    "        x = [torch.tensor((0, 0, 0, s),dtype=torch.float32)]\n",
+    "        for t in range(1, T+1):\n",
+    "            x.append(x[-1] + f(x[-1], u[t-1]) * dt)\n",
+    "        x_t = torch.stack(x)\n",
+    "        τ = torch.stack(x).detach()\n",
+    "        cost = cost_f(x_t, (x_x, x_y))\n",
+    "        costs.append(cost.item())\n",
+    "        optimizer.zero_grad()\n",
+    "        cost.backward()\n",
+    "        optimizer.step()\n",
+    "        if debug: \n",
+    "            print(u.data)\n",
+    "        # Only plot the first and last trajectories\n",
+    "        if epoch == 0: \n",
+    "            plot_τ(ax, τ, ax_lims=ax_lims)\n",
+    "        if epoch == epochs-1:\n",
+    "            plot_τ(ax, τ, car=True, ax_lims=ax_lims)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_planning_with_cost(x_x=5, x_y=1, s=1, T=5, epochs=5, stepsize=0.01, cost_f=vanilla_cost, debug=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=100, figsize=(10, 55))\n",
+    "for i in range(5, 16):\n",
+    "    ax = plt.subplot(11, 1, i - 5 + 1)\n",
+    "    path_planning_with_cost(x_x=5, x_y=1, s=1, T=i, epochs=50, stepsize=0.001, ax=ax, cost_f=vanilla_cost, debug=False)\n",
+    "    plt.title(f'T={i}')\n",
+    "plt.tight_layout()\n",
+    "plt.suptitle('Using just final position for the cost', y=1.01)\n",
+    "# plt.savefig('final-position.pdf')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=100, figsize=(10, 55))\n",
+    "plt.suptitle('Using final position and speed for the cost', y=1.01)\n",
+    "for i in range(5, 16):\n",
+    "    ax = plt.subplot(11, 1, i - 5 + 1)\n",
+    "    path_planning_with_cost(x_x=5, x_y=1, s=1, T=i, epochs=50, stepsize=0.001, cost_f=cost_with_target_s, ax=ax, debug=False)\n",
+    "    plt.title(f\"T={i}\")\n",
+    "plt.tight_layout()\n",
+    "# plt.savefig('final-position-and-speed.pdf')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=100, figsize=(10, 55))\n",
+    "plt.suptitle('Using sum of distances for the cost', y=1.01)\n",
+    "for i in range(5, 16):\n",
+    "    ax = plt.subplot(11, 1, i - 5 + 1)\n",
+    "    costs = path_planning_with_cost(x_x=5, x_y=1, s=1, T=i, epochs=40, stepsize=0.0025, ax=ax, cost_f=cost_sum_square_distances, debug=False)\n",
+    "    plt.title(f\"T={i}\")\n",
+    "    plt.gca().set_aspect(\"equal\")\n",
+    "plt.tight_layout()\n",
+    "# plt.savefig('average-distance.pdf')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=100, figsize=(10, 55))\n",
+    "plt.suptitle('Using softmin of distances for the cost (focusing on the points closest to target)', y=1.01)\n",
+    "for i in range(5, 16):\n",
+    "    ax = plt.subplot(11, 1, i - 5 + 1)\n",
+    "    path_planning_with_cost(x_x=5, x_y=1, s=1, T=i, epochs=100, stepsize=0.005, cost_f=cost_logsumexp, ax=ax, debug=False)\n",
+    "    plt.title(f\"T={i}\")\n",
+    "plt.tight_layout()\n",
+    "plt.savefig('softmin.pdf')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}