diff --git a/scripts/highway_planning.ipynb b/scripts/highway_planning.ipynb index 4fd9a87c0..b1722e1bb 100644 --- a/scripts/highway_planning.ipynb +++ b/scripts/highway_planning.ipynb @@ -1,100 +1,100 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "QKWvMXWMBEJA" - }, - "source": [ - "# Behavioural Planning for Autonomous Highway Driving\n", - "\n", - "We plan a trajectory using the _Optimistic Planning for Deterministic systems_ ([OPD](https://hal.inria.fr/hal-00830182)) algorithm.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "s-ghXis6A_md" - }, - "outputs": [], - "source": [ - "#@title Imports for env, agent, and visualisation.\n", - "# Environment\n", - "!pip install highway-env\n", - "import gymnasium as gym\n", - "import highway_env\n", - "\n", - "# Agent\n", - "!pip install git+https://github.com/eleurent/rl-agents#egg=rl-agents\n", - "from rl_agents.agents.common.factory import agent_factory\n", - "\n", - "# Visualisation\n", - "import sys\n", - "from tqdm.notebook import trange\n", - "!pip install moviepy -U\n", - "!pip install imageio_ffmpeg\n", - "!pip install pyvirtualdisplay\n", - "!apt-get install -y xvfb ffmpeg\n", - "!git clone https://github.com/Farama-Foundation/HighwayEnv.git\n", - "sys.path.insert(0, './highway-env/scripts/')\n", - "from utils import record_videos, show_videos\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bgNDDWwqCj8l" - }, - "outputs": [], - "source": [ - "#@title Run an episode\n", - "\n", - "# Make environment\n", - "env = gym.make(\"highway-fast-v0\", render_mode=\"rgb_array\")\n", - "env = record_videos(env)\n", - "(obs, info), done = env.reset(), False\n", - "\n", - "# Make agent\n", - "agent_config = {\n", - " \"__class__\": \"\",\n", - " \"env_preprocessors\": [{\"method\":\"simplify\"}],\n", - " \"budget\": 50,\n", - " \"gamma\": 0.7,\n", - "}\n", - "agent = agent_factory(env, agent_config)\n", - "\n", - "# Run episode\n", - "for step in trange(env.unwrapped.config[\"duration\"], desc=\"Running...\"):\n", - " action = agent.act(obs)\n", - " obs, reward, done, truncated, info = env.step(action)\n", - " \n", - "env.close()\n", - "show_videos()" - ] - } - ], - "metadata": { - "colab": { - "name": "highway-planning.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "pycharm": { - "stem_cell": { - "cell_type": "raw", - "metadata": { - "collapsed": false - }, - "source": [] - } - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "QKWvMXWMBEJA" + }, + "source": [ + "# Behavioural Planning for Autonomous Highway Driving\n", + "\n", + "We plan a trajectory using the _Optimistic Planning for Deterministic systems_ ([OPD](https://hal.inria.fr/hal-00830182)) algorithm.\n" + ] }, - "nbformat": 4, - "nbformat_minor": 0 + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "s-ghXis6A_md" + }, + "source": [ + "#@title Imports for env, agent, and visualisation.\n", + "# Environment\n", + "!pip install highway-env\n", + "import gymnasium as gym\n", + "import highway_env\n", + "\n", + "# Agent\n", + "!pip install git+https://github.com/eleurent/rl-agents#egg=rl-agents\n", + "from rl_agents.agents.common.factory import agent_factory\n", + "\n", + "# Visualisation\n", + "import sys\n", + "from tqdm.notebook import trange\n", + "!pip install moviepy -U\n", + "!pip install imageio_ffmpeg\n", + "!pip install pyvirtualdisplay\n", + "!apt-get install -y xvfb ffmpeg\n", + "!git clone https://github.com/Farama-Foundation/HighwayEnv.git\n", + "sys.path.insert(0, './highway-env/scripts/')\n", + "from utils import record_videos, show_videos\n" + ], + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bgNDDWwqCj8l" + }, + "source": [ + "#@title Run an episode\n", + "\n", + "# Make environment\n", + "env = gym.make(\"highway-fast-v0\", render_mode=\"rgb_array\")\n", + "env = record_videos(env)\n", + "(obs, info), done = env.reset(), False\n", + "\n", + "# Make agent\n", + "agent_config = {\n", + " \"__class__\": \"\",\n", + " \"env_preprocessors\": [{\"method\":\"simplify\"}],\n", + " \"budget\": 50,\n", + " \"gamma\": 0.7,\n", + "}\n", + "agent = agent_factory(env, agent_config)\n", + "\n", + "# Run episode\n", + "for step in trange(env.unwrapped.config[\"duration\"], desc=\"Running...\"):\n", + " action = agent.act(obs)\n", + " obs, reward, done, truncated, info = env.step(action)\n", + " \n", + "env.close()\n", + "show_videos()" + ], + "outputs": [] + } + ], + "metadata": { + "colab": { + "name": "highway-planning.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/scripts/intersection_social_dqn.ipynb b/scripts/intersection_social_dqn.ipynb index e29f32acb..bbfb2aaf2 100644 --- a/scripts/intersection_social_dqn.ipynb +++ b/scripts/intersection_social_dqn.ipynb @@ -1,179 +1,179 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "sepDWoBqdRMK" - }, - "source": [ - "# Training a DQN with social attention on `intersection-v0`\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Kx8X4s8krNWt" - }, - "outputs": [], - "source": [ - "#@title Import requirements\n", - "\n", - "# Environment\n", - "!pip install highway-env\n", - "import gymnasium as gym\n", - "\n", - "# Agent\n", - "!pip install git+https://github.com/eleurent/rl-agents#egg=rl-agents\n", - "\n", - "# Visualisation utils\n", - "!pip install moviepy\n", - "!pip install imageio_ffmpeg\n", - "import sys\n", - "%load_ext tensorboard\n", - "!pip install tensorboardx gym pyvirtualdisplay\n", - "!apt-get install -y xvfb ffmpeg\n", - "!git clone https://github.com/Farama-Foundation/HighwayEnv.git 2> /dev/null\n", - "sys.path.insert(0, '/content/HighwayEnv/scripts/')\n", - "from utils import show_videos" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vvOEW00pdHrG" - }, - "source": [ - "## Training\n", - "\n", - "We use a policy architecture based on social attention, see [[Leurent and Mercat, 2019]](https://arxiv.org/abs/1911.12250).\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QowKW3ix45ZW" - }, - "outputs": [], - "source": [ - "#@title Prepare environment, agent, and evaluation process.\n", - "\n", - "NUM_EPISODES = 3000 #@param {type: \"integer\"}\n", - "\n", - "from rl_agents.trainer.evaluation import Evaluation\n", - "from rl_agents.agents.common.factory import load_agent, load_environment\n", - "\n", - "# Get the environment and agent configurations from the rl-agents repository\n", - "!git clone https://github.com/eleurent/rl-agents.git 2> /dev/null\n", - "%cd /content/rl-agents/scripts/\n", - "env_config = 'configs/IntersectionEnv/env.json'\n", - "agent_config = 'configs/IntersectionEnv/agents/DQNAgent/ego_attention_2h.json'\n", - "\n", - "env = load_environment(env_config)\n", - "agent = load_agent(agent_config, env)\n", - "evaluation = Evaluation(env, agent, num_episodes=NUM_EPISODES, display_env=False, display_agent=False)\n", - "print(f\"Ready to train {agent} on {env}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nqnGqW6jd1xN" - }, - "source": [ - "Run tensorboard locally to visualize training." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "q7QJY2wc4_1N" - }, - "outputs": [], - "source": [ - "%tensorboard --logdir \"{evaluation.directory}\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BtK9dtfb0JMF" - }, - "source": [ - "Start training. This should take about an hour." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sFVq1gFz42Eg" - }, - "outputs": [], - "source": [ - "evaluation.train()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-lNvWg42RWiw" - }, - "source": [ - "Progress can be visualised in the tensorboard cell above, which should update every 30s (or manually). You may need to click the *Fit domain to data* buttons below each graph." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VKfvu5uhzCIU" - }, - "source": [ - "## Testing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gY0rpVYUtRpN" - }, - "outputs": [], - "source": [ - "#@title Run the learned policy for a few episodes.\n", - "env = load_environment(env_config)\n", - "env.config[\"offscreen_rendering\"] = True\n", - "agent = load_agent(agent_config, env)\n", - "evaluation = Evaluation(env, agent, num_episodes=20, training = False, recover = True)\n", - "evaluation.test()\n", - "show_videos(evaluation.run_directory)" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "name": "SocialAttentionDQN", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "pycharm": { - "stem_cell": { - "cell_type": "raw", - "metadata": { - "collapsed": false - }, - "source": [] - } - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "sepDWoBqdRMK" + }, + "source": [ + "# Training a DQN with social attention on `intersection-v0`\n", + "\n" + ] }, - "nbformat": 4, - "nbformat_minor": 0 + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Kx8X4s8krNWt" + }, + "source": [ + "#@title Import requirements\n", + "\n", + "# Environment\n", + "!pip install highway-env\n", + "import gymnasium as gym\n", + "\n", + "# Agent\n", + "!pip install git+https://github.com/eleurent/rl-agents#egg=rl-agents\n", + "\n", + "# Visualisation utils\n", + "!pip install moviepy\n", + "!pip install imageio_ffmpeg\n", + "import sys\n", + "%load_ext tensorboard\n", + "!pip install tensorboardx gym pyvirtualdisplay\n", + "!apt-get install -y xvfb ffmpeg\n", + "!git clone https://github.com/Farama-Foundation/HighwayEnv.git 2> /dev/null\n", + "sys.path.insert(0, '/content/HighwayEnv/scripts/')\n", + "from utils import show_videos" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vvOEW00pdHrG" + }, + "source": [ + "## Training\n", + "\n", + "We use a policy architecture based on social attention, see [[Leurent and Mercat, 2019]](https://arxiv.org/abs/1911.12250).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QowKW3ix45ZW" + }, + "source": [ + "#@title Prepare environment, agent, and evaluation process.\n", + "\n", + "NUM_EPISODES = 3000 #@param {type: \"integer\"}\n", + "\n", + "from rl_agents.trainer.evaluation import Evaluation\n", + "from rl_agents.agents.common.factory import load_agent, load_environment\n", + "\n", + "# Get the environment and agent configurations from the rl-agents repository\n", + "!git clone https://github.com/eleurent/rl-agents.git 2> /dev/null\n", + "%cd /content/rl-agents/scripts/\n", + "env_config = 'configs/IntersectionEnv/env.json'\n", + "agent_config = 'configs/IntersectionEnv/agents/DQNAgent/ego_attention_2h.json'\n", + "\n", + "env = load_environment(env_config)\n", + "agent = load_agent(agent_config, env)\n", + "evaluation = Evaluation(env, agent, num_episodes=NUM_EPISODES, display_env=False, display_agent=False)\n", + "print(f\"Ready to train {agent} on {env}\")" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nqnGqW6jd1xN" + }, + "source": [ + "Run tensorboard locally to visualize training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q7QJY2wc4_1N" + }, + "source": [ + "%tensorboard --logdir \"{evaluation.directory}\"" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BtK9dtfb0JMF" + }, + "source": [ + "Start training. This should take about an hour." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sFVq1gFz42Eg" + }, + "source": [ + "evaluation.train()" + ], + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-lNvWg42RWiw" + }, + "source": [ + "Progress can be visualised in the tensorboard cell above, which should update every 30s (or manually). You may need to click the *Fit domain to data* buttons below each graph." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VKfvu5uhzCIU" + }, + "source": [ + "## Testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gY0rpVYUtRpN" + }, + "source": [ + "#@title Run the learned policy for a few episodes.\n", + "env = load_environment(env_config)\n", + "env.config[\"offscreen_rendering\"] = True\n", + "agent = load_agent(agent_config, env)\n", + "evaluation = Evaluation(env, agent, num_episodes=20, training = False, recover = True)\n", + "evaluation.test()\n", + "show_videos(evaluation.run_directory)" + ], + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "SocialAttentionDQN", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 }