instadeepai · alaterre · Mar 17, 2023 · Aug 16, 2022 · Aug 16, 2022 · Aug 17, 2022
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -42,7 +42,7 @@ body:
 - type: input
   attributes:
     label: What Jumanji version are you using?
-    placeholder: For example jumanji v0.1.1
+    placeholder: For example jumanji v0.2.0
 - type: input
   attributes:
     label: Which accelerator(s) are you using?

diff --git a/.github/workflows/tests_linters.yml b/.github/workflows/tests_linters.yml
@@ -9,7 +9,7 @@ jobs:
 
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9"]
+        python-version: ["3.8", "3.9"]
         os: [ubuntu-latest]
 
     steps:
@@ -21,7 +21,7 @@ jobs:
         with:
             python-version: "${{ matrix.python-version }}"
       - name: Install python dependencies 🔧
-        run: pip install .[dev]
+        run: pip install .[dev,train]
       - name: Run linters 🖌️
         run: pre-commit run --all-files --verbose
       - name: Run tests 🧪

diff --git a/.gitignore b/.gitignore
@@ -151,3 +151,5 @@ cython_debug/
 
 3.8/
 jumanji_env/
+**/outputs/
+*.xml
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -29,13 +29,15 @@ Before sending your pull request for review, make sure your changes are consiste
 
 #### When contributing a new environment
 
-When contributing a new environment, make sure to do the following:
-1. Implement all the functions of the Environment abstraction: step, reset, specs, etc.
-2. Implement unit tests for every function used by the environment, including a `test_[your_env]__does_not_smoke` using the testing utils.
-3. Add an environment README in the `docs/environments/` folder describing the environment you implemented.
-4. Add an image (or gif) in the readme above. Images are located in `docs/img/`.
-5. Update the documentation api in `docs/api/environments/` to add your environment to the doc.
-6. Update the `mkdocs.yml` file to include the newly added markdown files.
+1. Confirm with a member of the development team that the environment is a right fit for the repo.
+2. Complete a design document outlining the problem formulation and software design. Please have the design doc reviewed by a core development team member before submitting PRs.
+3. New environments must be broken down into small, logical PRs that iteratively add the full logic of the environment. This is to avoid very large PRs that are hard to review and require more re-work if problems are discovered.
+4. Implement all the functions of the `Environment` abstraction: step, reset, specs, etc.
+5. Implement unit tests for every function used by the environment, including a `test_[your_env]__does_not_smoke` using the testing utils.
+6. Add an environment README in the `docs/environments/` folder describing the environment you implemented.
+7. Add an image (or gif) in the readme above. Images are located in `docs/img/`.
+8. Update the documentation api in `docs/api/environments/` to add your environment to the doc.
+9. Update the `mkdocs.yml` file to include the newly added markdown files.
 
 
 ### Coding Style

diff --git a/README.md b/README.md
diff --git a/docs/api/environments/binpack.md → docs/api/environments/bin_pack.md b/docs/api/environments/binpack.md → docs/api/environments/bin_pack.md
@@ -1,4 +1,4 @@
-::: jumanji.environments.combinatorial.binpack.env.BinPack
+::: jumanji.environments.packing.bin_pack.env.BinPack
     selection:
       members:
         - __init__

diff --git a/docs/api/environments/cleaner.md b/docs/api/environments/cleaner.md
@@ -0,0 +1 @@
+::: jumanji.environments.routing.cleaner.env.Cleaner
diff --git a/docs/api/environments/connect4.md b/docs/api/environments/connect4.md
diff --git a/docs/api/environments/connector.md b/docs/api/environments/connector.md
@@ -0,0 +1,11 @@
+::: jumanji.environments.routing.connector.env.Connector
+    selection:
+      members:
+        - init
+        - observation_spec
+        - action_spec
+        - reward_spec
+        - discount_spec
+        - reset
+        - step
+        - render
diff --git a/docs/api/environments/cvrp.md b/docs/api/environments/cvrp.md
@@ -1,4 +1,4 @@
-::: jumanji.environments.combinatorial.cvrp.env.CVRP
+::: jumanji.environments.routing.cvrp.env.CVRP
     selection:
       members:
         - __init__

diff --git a/docs/api/environments/game_2048.md b/docs/api/environments/game_2048.md
@@ -0,0 +1,8 @@
+::: jumanji.environments.logic.game_2048.env.Game2048
+    selection:
+      members:
+        - __init__
+        - reset
+        - step
+        - observation_spec
+        - action_spec
diff --git a/docs/api/environments/job_shop.md b/docs/api/environments/job_shop.md
@@ -0,0 +1,8 @@
+::: jumanji.environments.packing.job_shop.env.JobShop
+    selection:
+      members:
+        - __init__
+        - reset
+        - step
+        - observation_spec
+        - action_spec
diff --git a/docs/api/environments/knapsack.md b/docs/api/environments/knapsack.md
@@ -1,9 +1,8 @@
-::: jumanji.environments.combinatorial.knapsack.env.Knapsack
+::: jumanji.environments.packing.knapsack.env.Knapsack
     selection:
       members:
         - __init__
         - reset
-        - reset_from_state
         - step
         - observation_spec
         - action_spec
diff --git a/docs/api/environments/maze.md b/docs/api/environments/maze.md
@@ -0,0 +1,8 @@
+::: jumanji.environments.routing.maze.env.Maze
+    selection:
+      members:
+        - __init__
+        - reset
+        - step
+        - observation_spec
+        - action_spec
diff --git a/docs/api/environments/minesweeper.md b/docs/api/environments/minesweeper.md
@@ -0,0 +1,8 @@
+::: jumanji.environments.logic.minesweeper.env.Minesweeper
+    selection:
+      members:
+        - __init__
+        - reset
+        - step
+        - observation_spec
+        - action_spec
diff --git a/docs/api/environments/routing.md b/docs/api/environments/routing.md
diff --git a/docs/api/environments/rubiks_cube.md b/docs/api/environments/rubiks_cube.md
@@ -0,0 +1,8 @@
+::: jumanji.environments.logic.rubiks_cube.env.RubiksCube
+    selection:
+      members:
+        - __init__
+        - reset
+        - step
+        - observation_spec
+        - action_spec
diff --git a/docs/api/environments/snake.md b/docs/api/environments/snake.md
@@ -1,14 +1,8 @@
-::: jumanji.environments.games.snake.env.Snake
+::: jumanji.environments.routing.snake.env.Snake
     selection:
       members:
         - __init__
-        - observation_spec
-        - action_spec
         - reset
         - step
-        - update_head_pos
-        - render
-        - close
-        - animation
-        - __enter__
-        - __exit__
+        - observation_spec
+        - action_spec
diff --git a/docs/api/environments/tsp.md b/docs/api/environments/tsp.md
@@ -1,9 +1,8 @@
-::: jumanji.environments.combinatorial.tsp.env.TSP
+::: jumanji.environments.routing.tsp.env.TSP
     selection:
       members:
         - __init__
         - reset
-        - reset_from_state
         - step
         - observation_spec
         - action_spec
diff --git a/docs/api/wrappers.md b/docs/api/wrappers.md
@@ -7,7 +7,6 @@
         - VmapWrapper
         - AutoResetWrapper
         - JumanjiToGymWrapper
-        - JumanjiToGymWrapper
         - jumanji_to_gym_obs
       filters:
         - "!^_"

diff --git a/docs/env_anim/bin_pack.gif b/docs/env_anim/bin_pack.gif
diff --git a/docs/env_anim/cleaner.gif b/docs/env_anim/cleaner.gif
diff --git a/docs/env_anim/connector.gif b/docs/env_anim/connector.gif
diff --git a/docs/env_anim/cvrp.gif b/docs/env_anim/cvrp.gif
diff --git a/docs/env_anim/game_2048.gif b/docs/env_anim/game_2048.gif
diff --git a/docs/env_anim/job_shop.gif b/docs/env_anim/job_shop.gif
diff --git a/docs/env_anim/job_shop_annotated.gif b/docs/env_anim/job_shop_annotated.gif
diff --git a/docs/env_anim/knapsack.gif b/docs/env_anim/knapsack.gif
diff --git a/docs/env_anim/maze.gif b/docs/env_anim/maze.gif
diff --git a/docs/env_anim/minesweeper.gif b/docs/env_anim/minesweeper.gif
diff --git a/docs/env_anim/rubiks_cube.gif b/docs/env_anim/rubiks_cube.gif
diff --git a/docs/env_anim/snake.gif b/docs/env_anim/snake.gif
diff --git a/docs/env_anim/tsp.gif b/docs/env_anim/tsp.gif
diff --git a/docs/env_img/connector.png b/docs/env_img/connector.png
diff --git a/docs/env_img/cvrp.png b/docs/env_img/cvrp.png
diff --git a/docs/env_img/game_2048.png b/docs/env_img/game_2048.png
diff --git a/docs/env_img/job_shop.png b/docs/env_img/job_shop.png
diff --git a/docs/env_img/knapsack.png b/docs/env_img/knapsack.png
diff --git a/docs/env_img/maze.png b/docs/env_img/maze.png
diff --git a/docs/env_img/minesweeper.png b/docs/env_img/minesweeper.png
diff --git a/docs/env_img/rubiks_cube.png b/docs/env_img/rubiks_cube.png
diff --git a/docs/env_img/snake.png b/docs/env_img/snake.png
diff --git a/docs/env_img/tsp.png b/docs/env_img/tsp.png
diff --git a/docs/environments/bin_pack.md b/docs/environments/bin_pack.md
@@ -0,0 +1,46 @@
+# BinPack Environment
+# TODO
+<p align="center">
+        <img src="../env_anim/bin_pack.gif" width="500"/>
+</p>
+
+We provide here an implementation of the 3D [bin packing problem](https://en.wikipedia.org/wiki/Bin_packing_problem).
+In this problem, the goal of the agent is to efficiently pack a set of boxes (items) of different
+sizes into a single container with as little empty space as possible.
+
+
+## Observation
+The observation given to the agent provides information on the available empty space (called EMSs),
+the items that still need to be packed, and information on what actions are valid at this point.
+The full observation is as follows:
+- `ems`: `EMS` tree of jax arrays (float if `normalize_dimensions` else int32) each of shape
+    `(obs_num_ems,)`, coordinates of all EMSs at the current timestep.
+- `ems_mask`: jax array (bool) of shape `(obs_num_ems,)`, indicates the EMSs that are valid.
+- `items`: `Item` tree of jax arrays (float if `normalize_dimensions` else int32) each of shape
+    `(max_num_items,)`, characteristics of all items for this instance.
+- `items_mask`: jax array (bool) of shape `(max_num_items,)`, indicates the items that are valid.
+- `items_placed`: jax array (bool) of shape `(max_num_items,)`, indicates the items that have been
+    placed so far.
+- `action_mask`: jax array (bool) of shape `(obs_num_ems, max_num_items)`, mask of the joint action
+    space: `True` if the action `[ems_id, item_id]` is valid.
+
+
+## Action
+The action space is a `MultiDiscreteArray` of 2 integer values representing the ID of an EMS
+(space) and the ID of an item. For instance, `[1, 5]` will place item 5 in EMS 1.
+
+
+## Reward
+The reward could be either:
+- **Dense**: normalized volume (relative to the container volume) of the item packed by taking
+    the chosen action. The computed reward is equivalent to the increase in volume utilization
+    of the container due to packing the chosen item. If the action is invalid, the reward is 0.0
+    instead.
+- **Sparse**: computed only at the end of the episode (otherwise, returns 0.0). Returns the volume
+    utilization of the container (between 0.0 and 1.0). If the action is invalid, the action is
+    ignored and the reward is still returned as the current container utilization.
+
+
+## Registered Versions 📖
+- `BinPack-v1`, 3D bin-packing problem with a solvable random generator that generates up to 30
+items maximum, that can handle 100 EMSs and that shows the 70 largest EMSs to the agent.
diff --git a/docs/environments/binpack.md b/docs/environments/binpack.md
diff --git a/docs/environments/cleaner.md b/docs/environments/cleaner.md
@@ -0,0 +1,50 @@
+# Cleaner Environment
+
+<p align="center">
+        <img src="../env_anim/cleaner.gif" height="600"/>
+</p>
+
+We provide here a JAX jit-able implementation of the [Multi-Agent Cleaning](https://github.com/Bigpig4396/Multi-Agent-Reinforcement-Learning-Environment#multi-agent-cleaner)
+environment.
+
+In this environment, multiple agents must cooperatively clean the floor of a room with complex
+indoor barriers (black). At the beginning of an episode, the whole floor is dirty (green).
+Every time an agent (red) visits a dirty tile, it is cleaned (white).
+
+The goal is to clean as many tiles as possible in a given time budget.
+
+A new maze is randomly generated using a recursive division method for each new episode. Agents
+always start in the top left corner of the maze.
+
+
+## Observation
+The **observation** seen by the agent is a `NamedTuple` containing the following:
+- `grid`: jax array (int) of shape `(num_rows, num_cols)`, array representing the grid, each tile is
+    either dirty (0), clean (1), or a wall (2).
+- `agents_locations`: jax array (int) of shape `(num_agents, 2)`, array specifying the x and y
+    coordinates of every agent.
+- `action_mask`: jax array (bool) of shape `(num_agents, 4)`, array specifying, for each agent,
+    which action (up, right, down, left) is legal.
+- `step_count`: jax array (int32) of shape `()`, number of steps elapsed in the current episode.
+
+
+## Action
+The action space is a `MultiDiscreteArray` containing an integer value in `[0, 1, 2, 3]` for each
+agent. Each agent can take one of four actions: up (`0`), right (`1`), down (`2`), or left (`3`).
+
+The episode terminates if any agent meets one of the following conditions:
+
+- An invalid action is taken, or
+- An action is blocked by a wall.
+
+In both cases, the agent's position remains unchanged.
+
+
+## Reward
+The reward is global and shared among the agents. It is equal to the number of tiles which were
+cleaned during the time step, minus a penalty (0.5 by default) to encourage agents to clean the
+maze faster.
+
+
+## Registered Versions 📖
+- `Cleaner-v0`, a room of size 10x10 with 3 agents.
-Original file line number
+Diff line change
@@ Expand Up / @@ -151,3 +151,5 @@ cython_debug/ @@
 .8/
     jumanji_env/
+    **/outputs/
+    *.xml
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		::: jumanji.environments.routing.cleaner.env.Cleaner