+{%- endblock %}
\ No newline at end of file
diff --git a/docs/api/core.md b/docs/api/core.md
new file mode 100644
index 000000000..c03bf3289
--- /dev/null
+++ b/docs/api/core.md
@@ -0,0 +1,85 @@
+# Core
+
+## gymnasium.Env
+
+```{eval-rst}
+.. autofunction:: gymnasium.Env.step
+```
+
+```{eval-rst}
+.. autofunction:: gymnasium.Env.reset
+```
+
+```{eval-rst}
+.. autofunction:: gymnasium.Env.render
+```
+
+### Attributes
+
+```{eval-rst}
+.. autoattribute:: gymnasium.Env.action_space
+
+ This attribute gives the format of valid actions. It is of datatype `Space` provided by Gymnasium. For example, if the action space is of type `Discrete` and gives the value `Discrete(2)`, this means there are two valid discrete actions: 0 & 1.
+
+ .. code::
+
+ >>> env.action_space
+ Discrete(2)
+ >>> env.observation_space
+ Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)
+```
+
+```{eval-rst}
+.. autoattribute:: gymnasium.Env.observation_space
+
+ This attribute gives the format of valid observations. It is of datatype :class:`Space` provided by Gymnasium. For example, if the observation space is of type :class:`Box` and the shape of the object is ``(4,)``, this denotes a valid observation will be an array of 4 numbers. We can check the box bounds as well with attributes.
+
+ .. code::
+
+ >>> env.observation_space.high
+ array([4.8000002e+00, 3.4028235e+38, 4.1887903e-01, 3.4028235e+38], dtype=float32)
+ >>> env.observation_space.low
+ array([-4.8000002e+00, -3.4028235e+38, -4.1887903e-01, -3.4028235e+38], dtype=float32)
+```
+
+```{eval-rst}
+.. autoattribute:: gymnasium.Env.reward_range
+
+ This attribute is a tuple corresponding to min and max possible rewards. Default range is set to ``(-inf,+inf)``. You can set it if you want a narrower range.
+```
+
+### Additional Methods
+
+```{eval-rst}
+.. autofunction:: gymnasium.Env.close
+```
+
+```{eval-rst}
+.. autofunction:: gymnasium.Env.seed
+```
+
+
+## gymnasium.Wrapper
+
+```{eval-rst}
+.. autoclass:: gymnasium.Wrapper
+```
+
+## gymnasium.ObservationWrapper
+
+```{eval-rst}
+.. autoclass:: gymnasium.ObservationWrapper
+```
+
+
+## gymnasium.RewardWrapper
+
+```{eval-rst}
+.. autoclass:: gymnasium.RewardWrapper
+```
+
+## gymnasium.ActionWrapper
+
+```{eval-rst}
+.. autoclass:: gymnasium.ActionWrapper
+```
\ No newline at end of file
diff --git a/docs/api/spaces.md b/docs/api/spaces.md
new file mode 100644
index 000000000..cd332aa2c
--- /dev/null
+++ b/docs/api/spaces.md
@@ -0,0 +1,104 @@
+# Spaces
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.Space
+```
+
+## General Functions
+
+Each space implements the following functions:
+
+```{eval-rst}
+.. autofunction:: gymnasium.spaces.Space.sample
+
+.. autofunction:: gymnasium.spaces.Space.contains
+
+.. autoproperty:: gymnasium.spaces.Space.shape
+
+.. property:: gymnasium.spaces.Space.dtype
+
+ Return the data type of this space.
+
+.. autofunction:: gymnasium.spaces.Space.seed
+
+.. autofunction:: gymnasium.spaces.Space.to_jsonable
+
+.. autofunction:: gymnasium.spaces.Space.from_jsonable
+```
+
+## Fundamental Spaces
+
+### Box
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.Box
+
+ .. automethod:: is_bounded
+ .. automethod:: sample
+```
+
+### Discrete
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.Discrete
+
+ .. automethod:: sample
+```
+
+### MultiBinary
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.MultiBinary
+
+ .. automethod:: sample
+```
+
+### MultiDiscrete
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.MultiDiscrete
+
+ .. automethod:: sample
+```
+
+### Text
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.Text
+
+ .. automethod:: sample
+```
+
+## Composite Spaces
+
+### Dict
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.Dict
+
+ .. automethod:: sample
+```
+
+### Graph
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.Graph
+
+ .. automethod:: sample
+```
+
+### Sequence
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.Sequence
+
+ .. automethod:: sample
+```
+
+### Tuple
+
+```{eval-rst}
+.. autoclass:: gymnasium.spaces.Tuple
+
+ .. automethod:: sample
+```
diff --git a/docs/api/spaces_utils.md b/docs/api/spaces_utils.md
new file mode 100644
index 000000000..8315a9a54
--- /dev/null
+++ b/docs/api/spaces_utils.md
@@ -0,0 +1,15 @@
+---
+title: Utils
+---
+
+# Spaces Utils
+
+```{eval-rst}
+.. autofunction:: gymnasium.spaces.utils.flatdim
+
+.. autofunction:: gymnasium.spaces.utils.flatten_space
+
+.. autofunction:: gymnasium.spaces.utils.flatten
+
+.. autofunction:: gymnasium.spaces.utils.unflatten
+```
\ No newline at end of file
diff --git a/docs/api/utils.md b/docs/api/utils.md
new file mode 100644
index 000000000..9fd55430e
--- /dev/null
+++ b/docs/api/utils.md
@@ -0,0 +1,58 @@
+---
+title: Utils
+---
+
+# Utils
+
+## Visualization
+
+```{eval-rst}
+.. autoclass:: gymnasium.utils.play.PlayableGame
+
+ .. automethod:: process_event
+
+.. autoclass:: gymnasium.utils.play.PlayPlot
+
+ .. automethod:: callback
+
+.. autofunction:: gymnasium.utils.play.display_arr
+.. autofunction:: gymnasium.utils.play.play
+
+```
+
+## Save Rendering Videos
+
+```{eval-rst}
+.. autofunction:: gymnasium.utils.save_video.capped_cubic_video_schedule
+.. autofunction:: gymnasium.utils.save_video.save_video
+```
+
+## Old to New Step API Compatibility
+
+```{eval-rst}
+.. autofunction:: gymnasium.utils.step_api_compatibility.convert_to_terminated_truncated_step_api
+.. autofunction:: gymnasium.utils.step_api_compatibility.convert_to_done_step_api
+.. autofunction:: gymnasium.utils.step_api_compatibility.step_api_compatibility
+```
+
+## Seeding
+
+```{eval-rst}
+.. autofunction:: gymnasium.utils.seeding.np_random
+```
+
+## Environment Checking
+
+### Invasive
+
+```{eval-rst}
+.. autofunction:: gymnasium.utils.env_checker.check_env
+.. autofunction:: gymnasium.utils.env_checker.data_equivalence
+.. autofunction:: gymnasium.utils.env_checker.check_reset_seed
+.. autofunction:: gymnasium.utils.env_checker.check_reset_options
+.. autofunction:: gymnasium.utils.env_checker.check_reset_return_info_deprecation
+.. autofunction:: gymnasium.utils.env_checker.check_seed_deprecation
+.. autofunction:: gymnasium.utils.env_checker.check_reset_return_type
+.. autofunction:: gymnasium.utils.env_checker.check_space_limit
+```
+
diff --git a/docs/api/vector.md b/docs/api/vector.md
new file mode 100644
index 000000000..345957f7c
--- /dev/null
+++ b/docs/api/vector.md
@@ -0,0 +1,87 @@
+---
+title: Vector
+---
+
+# Vector
+
+```{eval-rst}
+.. autofunction:: gymnasium.vector.make
+```
+
+
+## VectorEnv
+
+```{eval-rst}
+.. attribute:: gymnasium.vector.VectorEnv.action_space
+
+ The (batched) action space. The input actions of `step` must be valid elements of `action_space`.::
+
+ >>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+ >>> envs.action_space
+ MultiDiscrete([2 2 2])
+
+.. attribute:: gymnasium.vector.VectorEnv.observation_space
+
+ The (batched) observation space. The observations returned by `reset` and `step` are valid elements of `observation_space`.::
+
+ >>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+ >>> envs.observation_space
+ Box([[-4.8 ...]], [[4.8 ...]], (3, 4), float32)
+
+.. attribute:: gymnasium.vector.VectorEnv.single_action_space
+
+ The action space of an environment copy.::
+
+ >>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+ >>> envs.single_action_space
+ Discrete(2)
+
+.. attribute:: gymnasium.vector.VectorEnv.single_observation_space
+
+ The observation space of an environment copy.::
+
+ >>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+ >>> envs.single_action_space
+ Box([-4.8 ...], [4.8 ...], (4,), float32)
+```
+
+
+
+### Reset
+
+```{eval-rst}
+.. automethod:: gymnasium.vector.VectorEnv.reset
+```
+
+```python
+>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs.reset()
+(array([[-0.02240574, -0.03439831, -0.03904812, 0.02810693],
+ [ 0.01586068, 0.01929009, 0.02394426, 0.04016077],
+ [-0.01314174, 0.03893502, -0.02400815, 0.0038326 ]],
+ dtype=float32), {})
+```
+### Step
+
+```{eval-rst}
+.. automethod:: gymnasium.vector.VectorEnv.step
+```
+
+```python
+>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs.reset()
+>>> actions = np.array([1, 0, 1])
+>>> observations, rewards, dones, infos = envs.step(actions)
+
+>>> observations
+array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
+ [ 0.00788269, -0.17490888, 0.03393489, 0.31735462],
+ [ 0.04918966, 0.19421194, 0.02938497, -0.29495203]],
+ dtype=float32)
+>>> rewards
+array([1., 1., 1.])
+>>> dones
+array([False, False, False])
+>>> infos
+{}
+```
diff --git a/docs/api/wrappers.md b/docs/api/wrappers.md
new file mode 100644
index 000000000..bc6f7c427
--- /dev/null
+++ b/docs/api/wrappers.md
@@ -0,0 +1,249 @@
+---
+title: Wrappers
+lastpage:
+---
+
+# Wrappers
+
+Wrappers are a convenient way to modify an existing environment without having to alter the underlying code directly.
+Using wrappers will allow you to avoid a lot of boilerplate code and make your environment more modular. Wrappers can
+also be chained to combine their effects. Most environments that are generated via `gymnasium.make` will already be wrapped by default.
+
+In order to wrap an environment, you must first initialize a base environment. Then you can pass this environment along
+with (possibly optional) parameters to the wrapper's constructor:
+```python
+>>> import gymnasium
+>>> from gymnasium.wrappers import RescaleAction
+>>> base_env = gymnasium.make("BipedalWalker-v3")
+>>> base_env.action_space
+Box([-1. -1. -1. -1.], [1. 1. 1. 1.], (4,), float32)
+>>> wrapped_env = RescaleAction(base_env, min_action=0, max_action=1)
+>>> wrapped_env.action_space
+Box([0. 0. 0. 0.], [1. 1. 1. 1.], (4,), float32)
+```
+You can access the environment underneath the **first** wrapper by using
+the `.env` attribute:
+
+```python
+>>> wrapped_env
+>>>>
+>>> wrapped_env.env
+>>>
+```
+
+If you want to get to the environment underneath **all** of the layers of wrappers,
+you can use the `.unwrapped` attribute.
+If the environment is already a bare environment, the `.unwrapped` attribute will just return itself.
+
+```python
+>>> wrapped_env
+>>>>
+>>> wrapped_env.unwrapped
+
+```
+
+There are three common things you might want a wrapper to do:
+
+- Transform actions before applying them to the base environment
+- Transform observations that are returned by the base environment
+- Transform rewards that are returned by the base environment
+
+Such wrappers can be easily implemented by inheriting from `ActionWrapper`, `ObservationWrapper`, or `RewardWrapper` and implementing the
+respective transformation. If you need a wrapper to do more complicated tasks, you can inherit from the `Wrapper` class directly.
+The code that is presented in the following sections can also be found in
+the [gymnasium-examples](https://github.com/Farama-Foundation/gymnasium-examples) repository
+
+## ActionWrapper
+If you would like to apply a function to the action before passing it to the base environment,
+you can simply inherit from `ActionWrapper` and overwrite the method `action` to implement that transformation.
+The transformation defined in that method must take values in the base environment's action space.
+However, its domain might differ from the original action space. In that case, you need to specify the new
+action space of the wrapper by setting `self.action_space` in the `__init__` method of your wrapper.
+
+Let's say you have an environment with action space of type `Box`, but you would
+only like to use a finite subset of actions. Then, you might want to implement the following wrapper
+
+```python
+class DiscreteActions(gymnasium.ActionWrapper):
+ def __init__(self, env, disc_to_cont):
+ super().__init__(env)
+ self.disc_to_cont = disc_to_cont
+ self.action_space = Discrete(len(disc_to_cont))
+
+ def action(self, act):
+ return self.disc_to_cont[act]
+
+if __name__ == "__main__":
+ env = gymnasium.make("LunarLanderContinuous-v2")
+ wrapped_env = DiscreteActions(env, [np.array([1,0]), np.array([-1,0]),
+ np.array([0,1]), np.array([0,-1])])
+ print(wrapped_env.action_space) #Discrete(4)
+```
+
+Among others, Gymnasium provides the action wrappers `ClipAction` and `RescaleAction`.
+
+## ObservationWrapper
+If you would like to apply a function to the observation that is returned by the base environment before passing
+it to learning code, you can simply inherit from `ObservationWrapper` and overwrite the method `observation` to
+implement that transformation. The transformation defined in that method must be defined on the base environment's
+observation space. However, it may take values in a different space. In that case, you need to specify the new
+observation space of the wrapper by setting `self.observation_space` in the `__init__` method of your wrapper.
+
+For example, you might have a 2D navigation task where the environment returns dictionaries as observations with keys `"agent_position"`
+and `"target_position"`. A common thing to do might be to throw away some degrees of freedom and only consider
+the position of the target relative to the agent, i.e. `observation["target_position"] - observation["agent_position"]`.
+For this, you could implement an observation wrapper like this:
+
+```python
+class RelativePosition(gymnasium.ObservationWrapper):
+ def __init__(self, env):
+ super().__init__(env)
+ self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf)
+
+ def observation(self, obs):
+ return obs["target"] - obs["agent"]
+```
+
+Among others, Gymnasium provides the observation wrapper `TimeAwareObservation`, which adds information about the index of the timestep
+to the observation.
+
+## RewardWrapper
+If you would like to apply a function to the reward that is returned by the base environment before passing
+it to learning code, you can simply inherit from `RewardWrapper` and overwrite the method `reward` to
+implement that transformation. This transformation might change the reward range; to specify the reward range of
+your wrapper, you can simply define `self.reward_range` in `__init__`.
+
+Let us look at an example: Sometimes (especially when we do not have control over the reward because it is intrinsic), we want to clip the reward
+to a range to gain some numerical stability. To do that, we could, for instance, implement the following wrapper:
+
+```python
+class ClipReward(gymnasium.RewardWrapper):
+ def __init__(self, env, min_reward, max_reward):
+ super().__init__(env)
+ self.min_reward = min_reward
+ self.max_reward = max_reward
+ self.reward_range = (min_reward, max_reward)
+
+ def reward(self, reward):
+ return np.clip(reward, self.min_reward, self.max_reward)
+```
+
+## AutoResetWrapper
+
+Some users may want a wrapper which will automatically reset its wrapped environment when its wrapped environment reaches the done state. An advantage of this environment is that it will never produce undefined behavior as standard gymnasium environments do when stepping beyond the done state.
+
+When calling step causes `self.env.step()` to return `done=True`,
+`self.env.reset()` is called,
+and the return format of `self.step()` is as follows:
+
+```python
+new_obs, terminal_reward, terminal_done, info
+```
+
+`new_obs` is the first observation after calling `self.env.reset()`,
+
+`terminal_reward` is the reward after calling `self.env.step()`,
+prior to calling `self.env.reset()`
+
+`terminal_done` is always `True`
+
+`info` is a dict containing all the keys from the info dict returned by
+the call to `self.env.reset()`, with additional keys `terminal_observation`
+containing the observation returned by the last call to `self.env.step()`
+and `terminal_info` containing the info dict returned by the last call
+to `self.env.step()`.
+
+If `done` is not true when `self.env.step()` is called, `self.step()` returns
+
+```python
+obs, reward, done, info
+```
+as normal.
+
+
+The AutoResetWrapper is not applied by default when calling `gymnasium.make()`, but can be applied by setting the optional `autoreset` argument to `True`:
+
+```python
+ env = gymnasium.make("CartPole-v1", autoreset=True)
+```
+
+The AutoResetWrapper can also be applied using its constructor:
+```python
+ env = gymnasium.make("CartPole-v1")
+ env = AutoResetWrapper(env)
+```
+
+
+```{note}
+When using the AutoResetWrapper to collect rollouts, note
+that the when `self.env.step()` returns `done`, a
+new observation from after calling `self.env.reset()` is returned
+by `self.step()` alongside the terminal reward and done state from the
+previous episode . If you need the terminal state from the previous
+episode, you need to retrieve it via the the `terminal_observation` key
+in the info dict. Make sure you know what you're doing if you
+use this wrapper!
+```
+
+
+## General Wrappers
+
+Sometimes you might need to implement a wrapper that does some more complicated modifications (e.g. modify the
+reward based on data in `info` or change the rendering behavior).
+Such wrappers can be implemented by inheriting from `Wrapper`.
+
+- You can set a new action or observation space by defining `self.action_space` or `self.observation_space` in `__init__`, respectively
+- You can set new metadata and reward range by defining `self.metadata` and `self.reward_range` in `__init__`, respectively
+- You can override `step`, `render`, `close` etc. If you do this, you can access the environment that was passed
+to your wrapper (which *still* might be wrapped in some other wrapper) by accessing the attribute `self.env`.
+
+Let's also take a look at an example for this case. Most MuJoCo environments return a reward that consists
+of different terms: For instance, there might be a term that rewards the agent for completing the task and one term that
+penalizes large actions (i.e. energy usage). Usually, you can pass weight parameters for those terms during
+initialization of the environment. However, *Reacher* does not allow you to do this! Nevertheless, all individual terms
+of the reward are returned in `info`, so let us build a wrapper for Reacher that allows us to weight those terms:
+
+```python
+class ReacherRewardWrapper(gymnasium.Wrapper):
+ def __init__(self, env, reward_dist_weight, reward_ctrl_weight):
+ super().__init__(env)
+ self.reward_dist_weight = reward_dist_weight
+ self.reward_ctrl_weight = reward_ctrl_weight
+
+ def step(self, action):
+ obs, _, terminated, truncated, info = self.env.step(action)
+ reward = (
+ self.reward_dist_weight * info["reward_dist"]
+ + self.reward_ctrl_weight * info["reward_ctrl"]
+ )
+ return obs, reward, terminated, truncated, info
+```
+
+```{note}
+It is *not* sufficient to use a `RewardWrapper` in this case!
+```
+
+## Available Wrappers
+
+| Name | Type | Arguments | Description |
+|---------------------------|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `AtariPreprocessing` | `gymnasium.Wrapper` | `env: gymnasium.Env`, `noop_max: int = 30`, `frame_skip: int = 4`, `screen_size: int = 84`, `terminal_on_life_loss: bool = False`, `grayscale_obs: bool = True`, `grayscale_newaxis: bool = False`, `scale_obs: bool = False` | Implements the best practices from Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation Protocols and Open Problems for General Agents" but will be deprecated soon. |
+| `AutoResetWrapper` | `gymnasium.Wrapper` | `env` | The wrapped environment will automatically reset when the done state is reached. Make sure you read the documentation before using this wrapper!|
+| `ClipAction` | `gymnasium.ActionWrapper` | `env` | Clip the continuous action to the valid bound specified by the environment's `action_space` |
+| `FilterObservation` | `gymnasium.ObservationWrapper` | `env`, `filter_keys=None` | If you have an environment that returns dictionaries as observations, but you would like to only keep a subset of the entries, you can use this wrapper. `filter_keys` should be an iterable that contains the keys that are kept in the new observation. If it is `None`, all keys will be kept and the wrapper has no effect. |
+| `FlattenObservation` | `gymnasium.ObservationWrapper` | `env` | Observation wrapper that flattens the observation |
+| `FrameStack` | `gymnasium.ObservationWrapper` | `env`, `num_stack`, `lz4_compress=False` | Observation wrapper that stacks the observations in a rolling manner. For example, if the number of stacks is 4, then the returned observation contains the most recent 4 observations. Observations will be objects of type `LazyFrames`. This object can be cast to a numpy array via `np.asarray(obs)`. You can also access single frames or slices via the usual `__getitem__` syntax. If `lz4_compress` is set to true, the `LazyFrames` object will compress the frames internally (losslessly). The first observation (i.e. the one returned by `reset`) will consist of `num_stack` repitions of the first frame. |
+| `GrayScaleObservation` | `gymnasium.ObservationWrapper` | `env`, `keep_dim=False` | Convert the image observation from RGB to gray scale. By default, the resulting observation will be 2-dimensional. If `keep_dim` is set to true, a singleton dimension will be added (i.e. the observations are of shape AxBx1). |
+| `NormalizeReward` | `gymnasium.Wrapper` | `env`, `gamma=0.99`, `epsilon=1e-8` | This wrapper will normalize immediate rewards s.t. their exponential moving average has a fixed variance. `epsilon` is a stability parameter and `gamma` is the discount factor that is used in the exponential moving average. The exponential moving average will have variance `(1 - gamma)**2`. The scaling depends on past trajectories and rewards will not be scaled correctly if the wrapper was newly instantiated or the policy was changed recently. |
+| `NormalizeObservation` | `gymnasium.Wrapper` | `env`, `epsilon=1e-8` | This wrapper will normalize observations s.t. each coordinate is centered with unit variance. The normalization depends on past trajectories and observations will not be normalized correctly if the wrapper was newly instantiated or the policy was changed recently. `epsilon` is a stability parameter that is used when scaling the observations. |
+| `OrderEnforcing` | `gymnasium.Wrapper` | `env` | This will produce an error if `step` is called before an initial `reset` |
+| `PixelObservationWrapper` | `gymnasium.ObservationWrapper` | `env`, `pixels_only=True`, `render_kwargs=None`, `pixel_keys=("pixels",)` | Augment observations by pixel values obtained via `render`. You can specify whether the original observations should be discarded entirely or be augmented by setting `pixels_only`. Also, you can provide keyword arguments for `render`. |
+| `RecordEpisodeStatistics` | `gymnasium.Wrapper` | `env`, `deque_size=100` | This will keep track of cumulative rewards and episode lengths. At the end of an episode, the statistics of the episode will be added to `info`. Moreover, the rewards and episode lengths are stored in buffers that can be accessed via `wrapped_env.return_queue` and `wrapped_env.length_queue` respectively. The size of these buffers can be set via `deque_size`. |
+| `RecordVideo` | `gymnasium.Wrapper` | `env`, `video_folder: str`, `episode_trigger: Callable[[int], bool] = None`, `step_trigger: Callable[[int], bool] = None`, `video_length: int = 0`, `name_prefix: str = "rl-video"` | This wrapper will record videos of rollouts. The results will be saved in the folder specified via `video_folder`. You can specify a prefix for the filenames via `name_prefix`. Usually, you only want to record the environment intermittently, say every hundreth episode. To allow this, you can pass `episode_trigger` or `step_trigger`. At most one of these should be passed. These functions will accept an episode index or step index, respectively. They should return a boolean that indicates whether a recording should be started at this point. If neither `episode_trigger`, nor `step_trigger` is passed, a default `episode_trigger` will be used. By default, the recording will be stopped once a done signal has been emitted by the environment. However, you can also create recordings of fixed length (possibly spanning several episodes) by passing a strictly positive value for `video_length`. |
+| `RescaleAction` | `gymnasium.ActionWrapper` | `env`, `min_action`, `max_action` | Rescales the continuous action space of the environment to a range \[`min_action`, `max_action`], where `min_action` and `max_action` are numpy arrays or floats. |
+| `ResizeObservation` | `gymnasium.ObservationWrapper` | `env`, `shape` | This wrapper works on environments with image observations (or more generally observations of shape AxBxC) and resizes the observation to the shape given by the tuple `shape`. The argument `shape` may also be an integer. In that case, the observation is scaled to a square of sidelength `shape` |
+| `TimeAwareObservation` | `gymnasium.ObservationWrapper` | `env` | Augment the observation with current time step in the trajectory (by appending it to the observation). This can be useful to ensure that things stay Markov. Currently it only works with one-dimensional observation spaces. |
+| `TimeLimit` | `gymnasium.Wrapper` | `env`, `max_episode_steps=None` | Probably the most useful wrapper in Gymnasium. This wrapper will emit a done signal if the specified number of steps is exceeded in an episode. In order to be able to distinguish termination and truncation, you need to check `info`. If it does not contain the key `"TimeLimit.truncated"`, the environment did not reach the timelimit. Otherwise, `info["TimeLimit.truncated"]` will be true if the episode was terminated because of the time limit. |
+| `TransformObservation` | `gymnasium.ObservationWrapper` | `env`, `f` | This wrapper will apply `f` to observations |
+| `TransformReward` | `gymnasium.RewardWrapper` | `env`, `f` | This wrapper will apply `f` to rewards |
+| `VectorListInfo` | `gymnasium.Wrapper` | `env` | This wrapper will convert the info of a vectorized environment from the `dict` format to a `list` of dictionaries where the _i-th_ dictionary contains info of the _i-th_ environment. If using other wrappers that perform operation on info like `RecordEpisodeStatistics`, this need to be the outermost wrapper. |
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 000000000..29b0c61ec
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,75 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- Project information -----------------------------------------------------
+import gymnasium
+
+project = "Gymnasium"
+copyright = "2022, Farama Foundation"
+author = "Farama Foundation"
+
+# The full version, including alpha/beta/rc tags
+release = gymnasium.__version__
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ "sphinx.ext.napoleon",
+ "sphinx.ext.doctest",
+ "sphinx.ext.autodoc",
+ "sphinx.ext.githubpages",
+ "myst_parser",
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# Napoleon settings
+napoleon_use_ivar = True
+napoleon_use_admonition_for_references = True
+# See https://github.com/sphinx-doc/sphinx/issues/9119
+napoleon_custom_sections = [("Returns", "params_style")]
+
+# Autodoc
+autoclass_content = "both"
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = "furo"
+html_title = "Gymnasium Documentation"
+html_baseurl = "https://gymnasium.farama.org"
+html_copy_source = False
+html_favicon = "_static/img/favicon.png"
+html_theme_options = {
+ "light_logo": "img/gymnasium_black.svg",
+ "dark_logo": "img/gymnasium_white.svg",
+}
+html_static_path = ["_static"]
+html_css_files = [
+ "css/custom.css",
+]
diff --git a/docs/content/basic_usage.md b/docs/content/basic_usage.md
new file mode 100644
index 000000000..2eb5762f0
--- /dev/null
+++ b/docs/content/basic_usage.md
@@ -0,0 +1,219 @@
+---
+layout: "contents"
+title: API
+firstpage:
+---
+
+# Basic Usage
+
+## Initializing Environments
+Initializing environments is very easy in Gymnasium and can be done via:
+
+```python
+import gymnasium
+env = gymnasium.make('CartPole-v0')
+```
+
+## Interacting with the Environment
+Gymnasium implements the classic "agent-environment loop":
+
+```{image} /_static/diagrams/AE_loop.png
+:width: 50%
+:align: center
+:class: only-light
+```
+
+```{image} /_static/diagrams/AE_loop_dark.png
+:width: 50%
+:align: center
+:class: only-dark
+```
+
+The agent performs some actions in the environment (usually by passing some control inputs to the environment, e.g. torque inputs of motors) and observes
+how the environment's state changes. One such action-observation exchange is referred to as a *timestep*.
+
+The goal in RL is to manipulate the environment in some specific way. For instance, we want the agent to navigate a robot
+to a specific point in space. If it succeeds in doing this (or makes some progress towards that goal), it will receive a positive reward
+alongside the observation for this timestep. The reward may also be negative or 0, if the agent did not yet succeed (or did not make any progress).
+The agent will then be trained to maximize the reward it accumulates over many timesteps.
+
+After some timesteps, the environment may enter a terminal state. For instance, the robot may have crashed! In that case,
+we want to reset the environment to a new initial state. The environment issues a done signal to the agent if it enters such a terminal state.
+Not all done signals must be triggered by a "catastrophic failure": Sometimes we also want to issue a done signal after
+a fixed number of timesteps, or if the agent has succeeded in completing some task in the environment.
+
+Let's see what the agent-environment loop looks like in Gymnasium.
+This example will run an instance of `LunarLander-v2` environment for 1000 timesteps. Since we pass `render_mode="human"`, you should see a window pop up rendering the environment.
+
+```python
+import gymnasium
+env = gymnasium.make("LunarLander-v2", render_mode="human")
+env.action_space.seed(42)
+
+observation, info = env.reset(seed=42)
+
+for _ in range(1000):
+ observation, reward, terminated, truncated, info = env.step(env.action_space.sample())
+
+ if terminated or truncated:
+ observation, info = env.reset()
+
+env.close()
+```
+
+The output should look something like this
+
+```{figure} https://user-images.githubusercontent.com/15806078/153222406-af5ce6f0-4696-4a24-a683-46ad4939170c.gif
+:width: 50%
+:align: center
+```
+
+Every environment specifies the format of valid actions by providing an `env.action_space` attribute. Similarly,
+the format of valid observations is specified by `env.observation_space`.
+In the example above we sampled random actions via `env.action_space.sample()`. Note that we need to seed the action space separately from the
+environment to ensure reproducible samples.
+
+## Checking API-Conformity
+If you have implemented a custom environment and would like to perform a sanity check to make sure that it conforms to
+the API, you can run:
+
+```python
+>>> from gymnasium.utils.env_checker import check_env
+>>> check_env(env)
+```
+
+This function will throw an exception if it seems like your environment does not follow the Gymnasium API. It will also produce
+warnings if it looks like you made a mistake or do not follow a best practice (e.g. if `observation_space` looks like
+an image but does not have the right dtype). Warnings can be turned off by passing `warn=False`. By default, `check_env` will
+not check the `render` method. To change this behavior, you can pass `skip_render_check=False`.
+
+> After running `check_env` on an environment, you should not reuse the instance that was checked, as it may have already
+been closed!
+
+## Spaces
+Spaces are usually used to specify the format of valid actions and observations.
+Every environment should have the attributes `action_space` and `observation_space`, both of which should be instances
+of classes that inherit from `Space`.
+There are multiple `Space` types available in Gymnasium:
+
+- `Box`: describes an n-dimensional continuous space. It's a bounded space where we can define the upper and lower limits which describe the valid values our observations can take.
+- `Discrete`: describes a discrete space where {0, 1, ..., n-1} are the possible values our observation or action can take. Values can be shifted to {a, a+1, ..., a+n-1} using an optional argument.
+- `Dict`: represents a dictionary of simple spaces.
+- `Tuple`: represents a tuple of simple spaces.
+- `MultiBinary`: creates a n-shape binary space. Argument n can be a number or a `list` of numbers.
+- `MultiDiscrete`: consists of a series of `Discrete` action spaces with a different number of actions in each element.
+
+```python
+>>> from gymnasium.spaces import Box, Discrete, Dict, Tuple, MultiBinary, MultiDiscrete
+>>>
+>>> observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32)
+>>> observation_space.sample()
+[ 1.6952509 -0.4399011 -0.7981693]
+>>>
+>>> observation_space = Discrete(4)
+>>> observation_space.sample()
+1
+>>>
+>>> observation_space = Discrete(5, start=-2)
+>>> observation_space.sample()
+-2
+>>>
+>>> observation_space = Dict({"position": Discrete(2), "velocity": Discrete(3)})
+>>> observation_space.sample()
+OrderedDict([('position', 0), ('velocity', 1)])
+>>>
+>>> observation_space = Tuple((Discrete(2), Discrete(3)))
+>>> observation_space.sample()
+(1, 2)
+>>>
+>>> observation_space = MultiBinary(5)
+>>> observation_space.sample()
+[1 1 1 0 1]
+>>>
+>>> observation_space = MultiDiscrete([ 5, 2, 2 ])
+>>> observation_space.sample()
+[3 0 0]
+ ```
+
+## Wrappers
+Wrappers are a convenient way to modify an existing environment without having to alter the underlying code directly.
+Using wrappers will allow you to avoid a lot of boilerplate code and make your environment more modular. Wrappers can
+also be chained to combine their effects. Most environments that are generated via `gymnasium.make` will already be wrapped by default.
+
+In order to wrap an environment, you must first initialize a base environment. Then you can pass this environment along
+with (possibly optional) parameters to the wrapper's constructor:
+```python
+>>> import gymnasium
+>>> from gymnasium.wrappers import RescaleAction
+>>> base_env = gymnasium.make("BipedalWalker-v3")
+>>> base_env.action_space
+Box([-1. -1. -1. -1.], [1. 1. 1. 1.], (4,), float32)
+>>> wrapped_env = RescaleAction(base_env, min_action=0, max_action=1)
+>>> wrapped_env.action_space
+Box([0. 0. 0. 0.], [1. 1. 1. 1.], (4,), float32)
+```
+
+
+There are three very common things you might want a wrapper to do:
+
+- Transform actions before applying them to the base environment
+- Transform observations that are returned by the base environment
+- Transform rewards that are returned by the base environment
+
+Such wrappers can be easily implemented by inheriting from `ActionWrapper`, `ObservationWrapper`, or `RewardWrapper` and implementing the
+respective transformation.
+
+However, sometimes you might need to implement a wrapper that does some more complicated modifications (e.g. modify the
+reward based on data in `info`). Such wrappers
+can be implemented by inheriting from `Wrapper`.
+Gymnasium already provides many commonly used wrappers for you. Some examples:
+
+- `TimeLimit`: Issue a done signal if a maximum number of timesteps has been exceeded (or the base environment has issued a done signal).
+- `ClipAction`: Clip the action such that it lies in the action space (of type `Box`).
+- `RescaleAction`: Rescale actions to lie in a specified interval
+- `TimeAwareObservation`: Add information about the index of timestep to observation. In some cases helpful to ensure that transitions are Markov.
+
+If you have a wrapped environment, and you want to get the unwrapped environment underneath all of the layers of wrappers (so that you can manually call a function or change some underlying aspect of the environment), you can use the `.unwrapped` attribute. If the environment is already a base environment, the `.unwrapped` attribute will just return itself.
+
+```python
+>>> wrapped_env
+>>>
+>>> wrapped_env.unwrapped
+
+```
+
+## Playing within an environment
+You can also play the environment using your keyboard using the `play` function in `gymnasium.utils.play`.
+```python
+from gymnasium.utils.play import play
+play(gymnasium.make('Pong-v0'))
+```
+This opens a window of the environment and allows you to control the agent using your keyboard.
+
+Playing using the keyboard requires a key-action map. This map should have type `dict[tuple[int], int | None]`, which maps the keys pressed to action performed.
+For example, if pressing the keys `w` and `space` at the same time is supposed to perform action `2`, then the `key_to_action` dict should look like:
+```python
+{
+ # ...
+ (ord('w'), ord(' ')): 2,
+ # ...
+}
+```
+As a more complete example, let's say we wish to play with `CartPole-v0` using our left and right arrow keys. The code would be as follows:
+```python
+import gymnasium
+import pygame
+from gymnasium.utils.play import play
+mapping = {(pygame.K_LEFT,): 0, (pygame.K_RIGHT,): 1}
+play(gymnasium.make("CartPole-v0"), keys_to_action=mapping)
+```
+where we obtain the corresponding key ID constants from pygame. If the `key_to_action` argument is not specified, then the default `key_to_action` mapping for that env is used, if provided.
+
+Furthermore, if you wish to plot real time statistics as you play, you can use `gymnasium.utils.play.PlayPlot`. Here's some sample code for plotting the reward for last 5 second of gameplay:
+```python
+def callback(obs_t, obs_tp1, action, rew, done, info):
+ return [rew,]
+plotter = PlayPlot(callback, 30 * 5, ["reward"])
+env = gymnasium.make("Pong-v0")
+play(env, callback=plotter.callback)
+```
diff --git a/docs/content/environment_creation.md b/docs/content/environment_creation.md
new file mode 100644
index 000000000..9f05cd96e
--- /dev/null
+++ b/docs/content/environment_creation.md
@@ -0,0 +1,403 @@
+---
+layout: "contents"
+title: Environment Creation
+---
+# Make your own custom environment
+
+This documentation overviews creating new environments and relevant useful wrappers, utilities and tests included in Gymnasium designed for the creation of new environments.
+You can clone gym-examples to play with the code that are presented here. We recommend that you use a virtual environment:
+
+```console
+git clone https://github.com/Farama-Foundation/gym-examples
+cd gym-examples
+python -m venv .env
+source .env/bin/activate
+pip install -e .
+```
+
+## Subclassing gymnasium.Env
+
+Before learning how to create your own environment you should check out [the documentation of Gymnasium's API](https://gymnasium.farama.org/content/api/).
+
+We will be concerned with a subset of gym-examples that looks like this:
+
+```sh
+gym-examples/
+ README.md
+ setup.py
+ gym_examples/
+ __init__.py
+ envs/
+ __init__.py
+ grid_world.py
+ wrappers/
+ __init__.py
+ relative_position.py
+ ```
+
+To illustrate the process of subclassing `gymnasium.Env`, we will implement a very simplistic game, called `GridWorldEnv`.
+We will write the code for our custom environment in `gym-examples/gym_examples/envs/grid_world.py`.
+The environment consists of a 2-dimensional square grid of fixed size (specified via the `size` parameter during construction).
+The agent can move vertically or horizontally between grid cells in each timestep. The goal of the agent is to navigate to a
+target on the grid that has been placed randomly at the beginning of the episode.
+
+- Observations provide the location of the target and agent.
+- There are 4 actions in our environment, corresponding to the movements "right", "up", "left", and "down".
+- A done signal is issued as soon as the agent has navigated to the grid cell where the target is located.
+- Rewards are binary and sparse, meaning that the immediate reward is always zero, unless the agent has reached the target, then it is 1.
+
+An episode in this environment (with `size=5`) might look like this:
+
+
+
+where the blue dot is the agent and the red square represents the target.
+
+
+Let us look at the source code of `GridWorldEnv` piece by piece:
+
+### Declaration and Initialization
+Our custom environment will inherit from the abstract class `gymnasium.Env`. You shouldn't forget to add the `metadata` attribute to you class.
+There, you should specify the render-modes that are supported by your environment (e.g. `"human"`, `"rgb_array"`, `"ansi"`)
+and the framerate at which your environment should be rendered. Every environment should support`None` as render-mode; you don't need to add it in the metadata.
+In `GridWorldEnv`, we will support the modes "rgb_array" and "human" and render at 4 FPS.
+
+The `__init__` method of our environment will accept the integer `size`, that determines the size of the square grid.
+We will set up some variables for rendering and define `self.observation_space` and `self.action_space`.
+In our case, observations should provide information about the location of the agent and target on the 2-dimensional grid.
+We will choose to represent observations in the form of a dictionaries with keys `"agent"` and `"target"`. An observation
+may look like ` {"agent": array([1, 0]), "target": array([0, 3])}`.
+Since we have 4 actions in our environment ("right", "up", "left", "down"), we will use `Discrete(4)` as an action space.
+Here is the declaration of `GridWorldEnv` and the implementation of `__init__`:
+```python
+import gymnasium
+from gymnasium import spaces
+import pygame
+import numpy as np
+
+
+class GridWorldEnv(gymnasium.Env):
+ metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
+
+ def __init__(self, render_mode=None, size=5):
+ self.size = size # The size of the square grid
+ self.window_size = 512 # The size of the PyGame window
+
+ # Observations are dictionaries with the agent's and the target's location.
+ # Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).
+ self.observation_space = spaces.Dict(
+ {
+ "agent": spaces.Box(0, size - 1, shape=(2,), dtype=int),
+ "target": spaces.Box(0, size - 1, shape=(2,), dtype=int),
+ }
+ )
+
+ # We have 4 actions, corresponding to "right", "up", "left", "down"
+ self.action_space = spaces.Discrete(4)
+
+ """
+ The following dictionary maps abstract actions from `self.action_space` to
+ the direction we will walk in if that action is taken.
+ I.e. 0 corresponds to "right", 1 to "up" etc.
+ """
+ self._action_to_direction = {
+ 0: np.array([1, 0]),
+ 1: np.array([0, 1]),
+ 2: np.array([-1, 0]),
+ 3: np.array([0, -1]),
+ }
+
+ assert render_mode is None or render_mode in self.metadata["render_modes"]
+ self.render_mode = render_mode
+
+ """
+ If human-rendering is used, `self.window` will be a reference
+ to the window that we draw to. `self.clock` will be a clock that is used
+ to ensure that the environment is rendered at the correct framerate in
+ human-mode. They will remain `None` until human-mode is used for the
+ first time.
+ """
+ self.window = None
+ self.clock = None
+
+```
+
+### Constructing Observations From Environment States
+Since we will need to compute observations both in `reset` and `step`, it is often convenient to have
+a (private) method `_get_obs` that translates the environment's state into an observation. However, this is not mandatory
+and you may as well compute observations in `reset` and `step` separately:
+```python
+ def _get_obs(self):
+ return {"agent": self._agent_location, "target": self._target_location}
+```
+We can also implement a similar method for the auxiliary information that is returned by `step` and `reset`. In our case,
+we would like to provide the manhattan distance between the agent and the target:
+```python
+ def _get_info(self):
+ return {"distance": np.linalg.norm(self._agent_location - self._target_location, ord=1)}
+```
+Oftentimes, info will also contain some data that is only available inside the `step` method (e.g. individual reward
+terms). In that case, we would have to update the dictionary that is returned by `_get_info` in `step`.
+
+### Reset
+The `reset` method will be called to initiate a new episode. You may assume that the `step` method will not
+be called before `reset` has been called. Moreover, `reset` should be called whenever a done signal has been issued.
+Users may pass the `seed` keyword to `reset` to initialize any random number generator that is used by the environment
+to a deterministic state. It is recommended to use the random number generator `self.np_random` that is provided by the environment's
+base class, `gymnasium.Env`. If you only use this RNG, you do not need to worry much about seeding, *but you need to remember to
+call `super().reset(seed=seed)`* to make sure that `gymnasium.Env` correctly seeds the RNG.
+Once this is done, we can randomly set the state of our environment.
+In our case, we randomly choose the agent's location and the randomly sample target positions, until it does not coincide with the agent's position.
+
+The `reset` method should return a tuple of the initial observation
+and some auxiliary information. We can use the methods `_get_obs`
+and `_get_info` that we implemented earlier for that:
+
+```python
+ def reset(self, seed=None, options=None):
+ # We need the following line to seed self.np_random
+ super().reset(seed=seed)
+
+ # Choose the agent's location uniformly at random
+ self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)
+
+ # We will sample the target's location randomly until it does not coincide with the agent's location
+ self._target_location = self._agent_location
+ while np.array_equal(self._target_location, self._agent_location):
+ self._target_location = self.np_random.integers(
+ 0, self.size, size=2, dtype=int
+ )
+
+ observation = self._get_obs()
+ info = self._get_info()
+
+ if self.render_mode == "human":
+ self._render_frame()
+
+ return observation, info
+```
+
+### Step
+The `step` method usually contains most of the logic of your environment. It accepts an `action`, computes the state of
+the environment after applying that action and returns the 4-tuple `(observation, reward, done, info)`.
+Once the new state of the environment has been computed, we can check whether it is a terminal state and we set `done`
+accordingly. Since we are using sparse binary rewards in `GridWorldEnv`, computing `reward` is trivial once we know `done`. To gather
+`observation` and `info`, we can again make use of `_get_obs` and `_get_info`:
+
+```python
+ def step(self, action):
+ # Map the action (element of {0,1,2,3}) to the direction we walk in
+ direction = self._action_to_direction[action]
+ # We use `np.clip` to make sure we don't leave the grid
+ self._agent_location = np.clip(
+ self._agent_location + direction, 0, self.size - 1
+ )
+ # An episode is done iff the agent has reached the target
+ terminated = np.array_equal(self._agent_location, self._target_location)
+ reward = 1 if terminated else 0 # Binary sparse rewards
+ observation = self._get_obs()
+ info = self._get_info()
+
+ if self.render_mode == "human":
+ self._render_frame()
+
+ return observation, reward, terminated, False, info
+```
+
+### Rendering
+Here, we are using PyGame for rendering. A similar approach to rendering is used in many environments that are included
+with Gymnasium and you can use it as a skeleton for your own environments:
+
+```python
+ def render(self):
+ if self.render_mode == "rgb_array":
+ return self._render_frame()
+
+ def _render_frame(self):
+ if self.window is None and self.render_mode == "human":
+ pygame.init()
+ pygame.display.init()
+ self.window = pygame.display.set_mode((self.window_size, self.window_size))
+ if self.clock is None and self.render_mode == "human":
+ self.clock = pygame.time.Clock()
+
+ canvas = pygame.Surface((self.window_size, self.window_size))
+ canvas.fill((255, 255, 255))
+ pix_square_size = (
+ self.window_size / self.size
+ ) # The size of a single grid square in pixels
+
+ # First we draw the target
+ pygame.draw.rect(
+ canvas,
+ (255, 0, 0),
+ pygame.Rect(
+ pix_square_size * self._target_location,
+ (pix_square_size, pix_square_size),
+ ),
+ )
+ # Now we draw the agent
+ pygame.draw.circle(
+ canvas,
+ (0, 0, 255),
+ (self._agent_location + 0.5) * pix_square_size,
+ pix_square_size / 3,
+ )
+
+ # Finally, add some gridlines
+ for x in range(self.size + 1):
+ pygame.draw.line(
+ canvas,
+ 0,
+ (0, pix_square_size * x),
+ (self.window_size, pix_square_size * x),
+ width=3,
+ )
+ pygame.draw.line(
+ canvas,
+ 0,
+ (pix_square_size * x, 0),
+ (pix_square_size * x, self.window_size),
+ width=3,
+ )
+
+ if self.render_mode == "human":
+ # The following line copies our drawings from `canvas` to the visible window
+ self.window.blit(canvas, canvas.get_rect())
+ pygame.event.pump()
+ pygame.display.update()
+
+ # We need to ensure that human-rendering occurs at the predefined framerate.
+ # The following line will automatically add a delay to keep the framerate stable.
+ self.clock.tick(self.metadata["render_fps"])
+ else: # rgb_array
+ return np.transpose(
+ np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
+ )
+```
+
+### Close
+The `close` method should close any open resources that were used by the environment. In many cases,
+you don't actually have to bother to implement this method. However, in our example `render_mode` may
+be `"human"` and we might need to close the window that has been opened:
+
+```python
+ def close(self):
+ if self.window is not None:
+ pygame.display.quit()
+ pygame.quit()
+```
+
+In other environments `close` might also close files that were opened
+or release other resources. You shouldn't interact with the environment after having called `close`.
+
+
+## Registering Envs
+
+In order for the custom environments to be detected by Gymnasium, they must be registered as follows. We will choose to put this code in `gym-examples/gym_examples/__init__.py`.
+
+```python
+from gymnasium.envs.registration import register
+
+register(
+ id='gym_examples/GridWorld-v0',
+ entry_point='gym_examples.envs:GridWorldEnv',
+ max_episode_steps=300,
+)
+```
+The environment ID consists of three components, two of which are optional: an optional namespace (here: `gym_examples`), a mandatory name (here: `GridWorld`) and an optional but recommended version (here: v0). It might have also been registered as `GridWorld-v0` (the recommended approach), `GridWorld` or `gym_examples/GridWorld`, and the appropriate ID should then be used during environment creation.
+
+The keyword argument `max_episode_steps=300` will ensure that GridWorld environments that are instantiated via `gymnasium.make`
+will be wrapped in a `TimeLimit` wrapper (see [the wrapper documentation](https://www.gymlibrary.dev/pages/wrappers/index)
+for more information). A done signal will then be produced if the agent has reached the target *or* 300 steps have been
+executed in the current episode. To distinguish truncation and termination, you can check `info["TimeLimit.truncated"]`.
+
+Apart from `id` and `entrypoint`, you may pass the following additional keyword arguments to `register`:
+
+| Name | Type | Default | Description |
+|---------------------|----------|----------|-----------------------------------------------------------------------------------------------------------|
+| `reward_threshold` | `float` | `None` | The reward threshold before the task is considered solved |
+| `nondeterministic` | `bool` | `False` | Whether this environment is non-deterministic even after seeding |
+| `max_episode_steps` | `int` | `None` | The maximum number of steps that an episode can consist of. If not `None`, a `TimeLimit` wrapper is added |
+| `order_enforce` | `bool` | `True` | Whether to wrap the environment in an `OrderEnforcing` wrapper |
+| `autoreset` | `bool` | `False` | Whether to wrap the environment in an `AutoResetWrapper` |
+| `kwargs` | `dict` | `{}` | The default kwargs to pass to the environment class |
+
+Most of these keywords (except for `max_episode_steps`, `order_enforce` and `kwargs`) do not alter the behavior
+of environment instances but merely provide some extra information about your environment.
+After registration, our custom `GridWorldEnv` environment can be created with `env = gymnasium.make('gym_examples/GridWorld-v0')`.
+
+`gym-examples/gym_examples/envs/__init__.py` should have:
+
+```python
+from gym_examples.envs.grid_world import GridWorldEnv
+```
+
+If your environment is not registered, you may optionally pass a module to import, that would register your environment before creating it like this -
+`env = gymnasium.make('module:Env-v0')`, where `module` contains the registration code. For the GridWorld env, the registration code is run by importing `gym_examples` so if it were not possible to import gym_examples explicitly, you could register while making by `env = gymnasium.make('gym_examples:gym_examples/GridWorld-v0)`. This is especially useful when you're allowed to pass only the environment ID into a third-party codebase (eg. learning library). This lets you register your environment without needing to edit the library's source code.
+
+## Creating a Package
+
+The last step is to structure our code as a Python package. This involves configuring `gym-examples/setup.py`. A minimal example of how to do so is as follows:
+
+```python
+from setuptools import setup
+
+setup(
+ name="gym_examples",
+ version="0.0.1",
+ install_requires=["gymnasium==0.26.0", "pygame==2.1.0"],
+)
+```
+
+## Creating Environment Instances
+After you have installed your package locally with `pip install -e gym-examples`, you can create an instance of the environment via:
+
+```python
+import gym_examples
+env = gymnasium.make('gym_examples/GridWorld-v0')
+```
+
+You can also pass keyword arguments of your environment's constructor to `gymnasium.make` to customize the environment.
+In our case, we could do:
+
+```python
+env = gymnasium.make('gym_examples/GridWorld-v0', size=10)
+```
+
+Sometimes, you may find it more convenient to skip registration and call the environment's
+constructor yourself. Some may find this approach more pythonic and environments that are instantiated like this are
+also perfectly fine (but remember to add wrappers as well!).
+
+## Using Wrappers
+Oftentimes, we want to use different variants of a custom environment, or we want to
+modify the behavior of an environment that is provided by Gymnasium or some other party.
+Wrappers allow us to do this without changing the environment implementation or adding any boilerplate code.
+Check out the [wrapper documentation](https://www.gymlibrary.dev/content/wrappers/) for details on how to
+use wrappers and instructions for implementing your own.
+In our example, observations cannot be used directly in learning code because they are dictionaries.
+However, we don't actually need to touch our environment implementation to fix this! We can simply add
+a wrapper on top of environment instances to flatten observations into a single array:
+
+```python
+import gym_examples
+from gymnasium.wrappers import FlattenObservation
+
+env = gymnasium.make('gym_examples/GridWorld-v0')
+wrapped_env = FlattenObservation(env)
+print(wrapped_env.reset()) # E.g. [3 0 3 3], {}
+```
+
+Wrappers have the big advantage that they make environments highly modular. For instance, instead of flattening the
+observations from GridWorld, you might only want to look at the relative position of the target and the agent.
+In the section on [ObservationWrappers](https://www.gymlibrary.dev/content/wrappers/#observationwrapper) we have implemented
+a wrapper that does this job. This wrapper is also available in gym-examples:
+
+```python
+import gym_examples
+from gym_examples.wrappers import RelativePosition
+
+env = gymnasium.make('gym_examples/GridWorld-v0')
+wrapped_env = RelativePosition(env)
+print(wrapped_env.reset()) # E.g. [-3 3], {}
+```
+
diff --git a/docs/content/vectorising.md b/docs/content/vectorising.md
new file mode 100644
index 000000000..2814fc7d2
--- /dev/null
+++ b/docs/content/vectorising.md
@@ -0,0 +1,329 @@
+---
+layout: "contents"
+title: Vectorising your environments
+---
+
+# Vectorising your environments
+
+## Vectorized Environments
+*Vectorized environments* are environments that run multiple independent copies of the same environment in parallel using [multiprocessing](https://docs.python.org/3/library/multiprocessing.html). Vectorized environments take as input a batch of actions, and return a batch of observations. This is particularly useful, for example, when the policy is defined as a neural network that operates over a batch of observations.
+
+Gymnasium provides two types of vectorized environments:
+
+- `gymnasium.vector.SyncVectorEnv`, where the different copies of the environment are executed sequentially.
+- `gymnasium.vector.AsyncVectorEnv`, where the the different copies of the environment are executed in parallel using [multiprocessing](https://docs.python.org/3/library/multiprocessing.html). This creates one process per copy.
+
+
+Similar to `gymnasium.make`, you can run a vectorized version of a registered environment using the `gymnasium.vector.make` function. This runs multiple copies of the same environment (in parallel, by default).
+
+The following example runs 3 copies of the ``CartPole-v1`` environment in parallel, taking as input a vector of 3 binary actions (one for each copy of the environment), and returning an array of 3 observations stacked along the first dimension, with an array of rewards returned by each copy, and an array of booleans indicating if the episode in each parallel environment has ended.
+
+```python
+>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs.reset()
+>>> actions = np.array([1, 0, 1])
+>>> observations, rewards, dones, infos = envs.step(actions)
+
+>>> observations
+array([[ 0.00122802, 0.16228443, 0.02521779, -0.23700266],
+ [ 0.00788269, -0.17490888, 0.03393489, 0.31735462],
+ [ 0.04918966, 0.19421194, 0.02938497, -0.29495203]],
+ dtype=float32)
+>>> rewards
+array([1., 1., 1.])
+>>> dones
+array([False, False, False])
+>>> infos
+{}
+```
+
+The function `gymnasium.vector.make` is meant to be used only in basic cases (e.g. running multiple copies of the same registered environment). For any other use-cases, please use either the `SyncVectorEnv` for sequential execution, or `AsyncVectorEnv` for parallel execution. These use-cases may include:
+
+- Running multiple instances of the same environment with different parameters (e.g. ``"Pendulum-v0"`` with different values for the gravity).
+- Running multiple instances of an unregistered environment (e.g. a custom environment).
+- Using a wrapper on some (but not all) environment copies.
+
+
+### Creating a vectorized environment
+To create a vectorized environment that runs multiple environment copies, you can wrap your parallel environments inside `gymnasium.vector.SyncVectorEnv` (for sequential execution), or `gymnasium.vector.AsyncVectorEnv` (for parallel execution, with [multiprocessing](https://docs.python.org/3/library/multiprocessing.html)). These vectorized environments take as input a list of callables specifying how the copies are created.
+
+```python
+>>> envs = gymnasium.vector.AsyncVectorEnv([
+... lambda: gymnasium.make("CartPole-v1"),
+... lambda: gymnasium.make("CartPole-v1"),
+... lambda: gymnasium.make("CartPole-v1")
+... ])
+```
+
+Alternatively, to create a vectorized environment of multiple copies of the same registered environment, you can use the function `gymnasium.vector.make()`.
+
+```python
+>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3) # Equivalent
+```
+
+To enable automatic batching of actions and observations, all of the environment copies must share the same `action_space` and `observation_space`. However, all of the parallel environments are not required to be exact copies of one another. For example, you can run 2 instances of ``Pendulum-v0`` with different values for gravity in a vectorized environment with:
+
+```python
+>>> env = gymnasium.vector.AsyncVectorEnv([
+... lambda: gymnasium.make("Pendulum-v0", g=9.81),
+... lambda: gymnasium.make("Pendulum-v0", g=1.62)
+... ])
+```
+
+See the `Observation & Action spaces` section for more information about automatic batching.
+
+When using `AsyncVectorEnv` with either the ``spawn`` or ``forkserver`` start methods, you must wrap your code containing the vectorized environment with ``if __name__ == "__main__":``. See [this documentation](https://docs.python.org/3/library/multiprocessing.html#the-spawn-and-forkserver-start-methods) for more information.
+
+```python
+if __name__ == "__main__":
+ envs = gymnasium.vector.make("CartPole-v1", num_envs=3, context="spawn")
+```
+### Working with vectorized environments
+While standard Gymnasium environments take a single action and return a single observation (with a reward, and boolean indicating termination), vectorized environments take a *batch of actions* as input, and return a *batch of observations*, together with an array of rewards and booleans indicating if the episode ended in each environment copy.
+
+
+```python
+>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs.reset()
+(array([[-0.02792548, -0.04423395, 0.00026012, 0.04486719],
+ [-0.04906582, 0.02779809, 0.02881928, -0.04467649],
+ [ 0.0036706 , -0.00324916, 0.047668 , -0.02039891]],
+ dtype=float32), {})
+
+>>> actions = np.array([1, 0, 1])
+>>> observations, rewards, dones, infos = envs.step(actions)
+
+>>> observations
+array([[ 0.00187507, 0.18986781, -0.03168437, -0.301252 ],
+ [-0.02643229, -0.18816885, 0.04371385, 0.3034975 ],
+ [-0.02803041, 0.24251814, 0.02660446, -0.29707024]],
+ dtype=float32)
+>>> rewards
+array([1., 1., 1.])
+>>> dones
+array([False, False, False])
+>>> infos
+{}
+```
+
+Vectorized environments are compatible with any environment, regardless of the action and observation spaces (e.g. container spaces like `gymnasium.spaces.Dict`, or any arbitrarily nested spaces). In particular, vectorized environments can automatically batch the observations returned by `VectorEnv.reset` and `VectorEnv.step` for any standard Gymnasium `Space` (e.g. `gymnasium.spaces.Box`, `gymnasium.spaces.Discrete`, `gymnasium.spaces.Dict`, or any nested structure thereof). Similarly, vectorized environments can take batches of actions from any standard Gymnasium `Space`.
+
+```python
+>>> class DictEnv(gymnasium.Env):
+... observation_space = gymnasium.spaces.Dict({
+... "position": gymnasium.spaces.Box(-1., 1., (3,), np.float32),
+... "velocity": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
+... })
+... action_space = gymnasium.spaces.Dict({
+... "fire": gymnasium.spaces.Discrete(2),
+... "jump": gymnasium.spaces.Discrete(2),
+... "acceleration": gymnasium.spaces.Box(-1., 1., (2,), np.float32)
+... })
+...
+... def reset(self):
+... return self.observation_space.sample()
+...
+... def step(self, action):
+... observation = self.observation_space.sample()
+... return (observation, 0., False, {})
+
+>>> envs = gymnasium.vector.AsyncVectorEnv([lambda: DictEnv()] * 3)
+>>> envs.observation_space
+Dict(position:Box(-1.0, 1.0, (3, 3), float32), velocity:Box(-1.0, 1.0, (3, 2), float32))
+>>> envs.action_space
+Dict(fire:MultiDiscrete([2 2 2]), jump:MultiDiscrete([2 2 2]), acceleration:Box(-1.0, 1.0, (3, 2), float32))
+
+>>> envs.reset()
+>>> actions = {
+... "fire": np.array([1, 1, 0]),
+... "jump": np.array([0, 1, 0]),
+... "acceleration": np.random.uniform(-1., 1., size=(3, 2))
+... }
+>>> observations, rewards, dones, infos = envs.step(actions)
+>>> observations
+{"position": array([[-0.5337036 , 0.7439302 , 0.41748118],
+ [ 0.9373266 , -0.5780453 , 0.8987405 ],
+ [-0.917269 , -0.5888639 , 0.812942 ]], dtype=float32),
+"velocity": array([[ 0.23626241, -0.0616814 ],
+ [-0.4057572 , -0.4875375 ],
+ [ 0.26341468, 0.72282314]], dtype=float32)}
+```
+
+The environment copies inside a vectorized environment automatically call `gymnasium.Env.reset` at the end of an episode. In the following example, the episode of the 3rd copy ends after 2 steps (the agent fell in a hole), and the paralle environment gets reset (observation ``0``).
+
+```python
+>>> envs = gymnasium.vector.make("FrozenLake-v1", num_envs=3, is_slippery=False)
+>>> envs.reset()
+(array([0, 0, 0]), {'prob': array([1, 1, 1]), '_prob': array([ True, True, True])})
+>>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 2]))
+>>> observations, rewards, dones, infos = envs.step(np.array([1, 2, 1]))
+
+>>> dones
+array([False, False, True])
+>>> observations
+array([8, 2, 0])
+```
+
+Vectorized environments will return `infos` in the form of a dictionary where each value is an array of length `num_envs` and the _i-th_ value of the array represents the info of the _i-th_ environment.
+Each `key` of the info is paired with a boolean mask `_key` representing whether or not the _i-th_ environment has data.
+If the _dtype_ of the returned info is whether `int`, `float`, `bool` or any _dtype_ inherited from `np.number`, an array of the same _dtype_ will be returned. Otherwise, the array will have _dtype_ `object`.
+
+
+```python
+>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> observations, infos = envs.reset()
+
+>>> actions = np.array([1, 0, 1])
+>>> observations, rewards, dones, infos = envs.step(actions)
+
+>>> while not any(dones):
+... observations, rewards, dones, infos = envs.step(actions)
+
+>>> print(dones)
+[False, True, False]
+
+>>> print(infos)
+{'terminal_observation': array([None,
+ array([-0.11350546, -1.8090094 , 0.23710881, 2.8017728 ], dtype=float32),
+ None], dtype=object), '_terminal_observation': array([False, True, False])}
+```
+
+
+## Observation & Action spaces
+Like any Gymnasium environment, vectorized environments contain the two properties `VectorEnv.observation_space` and `VectorEnv.action_space` to specify the observation and action spaces of the environments. Since vectorized environments operate on multiple environment copies, where the actions taken and observations returned by all of the copies are batched together, the observation and action *spaces* are batched as well so that the input actions are valid elements of `VectorEnv.action_space`, and the observations are valid elements of `VectorEnv.observation_space`.
+
+```python
+>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs.observation_space
+Box([[-4.8 ...]], [[4.8 ...]], (3, 4), float32)
+>>> envs.action_space
+MultiDiscrete([2 2 2])
+```
+
+In order to appropriately batch the observations and actions in vectorized environments, the observation and action spaces of all of the copies are required to be identical.
+
+```python
+>>> envs = gymnasium.vector.AsyncVectorEnv([
+... lambda: gymnasium.make("CartPole-v1"),
+... lambda: gymnasium.make("MountainCar-v0")
+... ])
+RuntimeError: Some environments have an observation space different from `Box([-4.8 ...], [4.8 ...], (4,), float32)`.
+In order to batch observations, the observation spaces from all environments must be equal.
+```
+However, sometimes it may be handy to have access to the observation and action spaces of a particular copy, and not the batched spaces. You can access those with the properties `VectorEnv.single_observation_space` and `VectorEnv.single_action_space` of the vectorized environment.
+
+```python
+>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> envs.single_observation_space
+Box([-4.8 ...], [4.8 ...], (4,), float32)
+>>> envs.single_action_space
+Discrete(2)
+```
+This is convenient, for example, if you instantiate a policy. In the following example, we use `VectorEnv.single_observation_space` and `VectorEnv.single_action_space` to define the weights of a linear policy. Note that, thanks to the vectorized environment, we can apply the policy directly to the whole batch of observations with a single call to `policy`.
+
+```python
+>>> from gymnasium.spaces.utils import flatdim
+>>> from scipy.special import softmax
+
+>>> def policy(weights, observations):
+... logits = np.dot(observations, weights)
+... return softmax(logits, axis=1)
+
+>>> envs = gymnasium.vector.make("CartPole-v1", num_envs=3)
+>>> weights = np.random.randn(
+... flatdim(envs.single_observation_space),
+... envs.single_action_space.n
+... )
+>>> observations, infos = envs.reset()
+>>> actions = policy(weights, observations).argmax(axis=1)
+>>> observations, rewards, dones, infos = envs.step(actions)
+```
+
+## Intermediate Usage
+
+### Shared memory
+`AsyncVectorEnv` runs each environment copy inside an individual process. At each call to `AsyncVectorEnv.reset` or `AsyncVectorEnv.step`, the observations of all of the parallel environments are sent back to the main process. To avoid expensive transfers of data between processes, especially with large observations (e.g. images), `AsyncVectorEnv` uses a shared memory by default (``shared_memory=True``) that processes can write to and read from at minimal cost. This can increase the throughout of the vectorized environment.
+
+```python
+>>> env_fns = [lambda: gymnasium.make("BreakoutNoFrameskip-v4")] * 5
+
+>>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=False)
+>>> envs.reset()
+>>> %timeit envs.step(envs.action_space.sample())
+2.23 ms ± 136 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
+
+>>> envs = gymnasium.vector.AsyncVectorEnv(env_fns, shared_memory=True)
+>>> envs.reset()
+>>> %timeit envs.step(envs.action_space.sample())
+1.36 ms ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
+```
+
+### Exception handling
+Because sometimes things may not go as planned, the exceptions raised in any given environment copy are re-raised in the vectorized environment, even when the copy run in parallel with `AsyncVectorEnv`. This way, you can choose how to handle these exceptions yourself (with ``try ... except``).
+
+```python
+>>> class ErrorEnv(gymnasium.Env):
+... observation_space = gymnasium.spaces.Box(-1., 1., (2,), np.float32)
+... action_space = gymnasium.spaces.Discrete(2)
+...
+... def reset(self):
+... return np.zeros((2,), dtype=np.float32), {}
+...
+... def step(self, action):
+... if action == 1:
+... raise ValueError("An error occurred.")
+... observation = self.observation_space.sample()
+... return (observation, 0., False, {})
+
+>>> envs = gymnasium.vector.AsyncVectorEnv([lambda: ErrorEnv()] * 3)
+>>> observations, infos = envs.reset()
+>>> observations, rewards, dones, infos = envs.step(np.array([0, 0, 1]))
+ERROR: Received the following error from Worker-2: ValueError: An error occurred.
+ERROR: Shutting down Worker-2.
+ERROR: Raising the last exception back to the main process.
+ValueError: An error occurred.
+```
+
+## Advanced Usage
+
+### Custom spaces
+Vectorized environments will batch actions and observations if they are elements from standard Gymnasium spaces, such as `gymnasium.spaces.Box`, `gymnasium.spaces.Discrete`, or `gymnasium.spaces.Dict`. However, if you create your own environment with a custom action and/or observation space (inheriting from `gymnasium.Space`), the vectorized environment will not attempt to automatically batch the actions/observations, and instead it will return the raw tuple of elements from all parallel environments.
+
+In the following example, we create a new environment `SMILESEnv`, whose observations are strings representing the [SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) notation of a molecular structure, with a custom observation space `SMILES`. The observations returned by the vectorized environment are contained in a tuple of strings.
+
+```python
+>>> class SMILES(gymnasium.Space):
+... def __init__(self, symbols):
+... super().__init__()
+... self.symbols = symbols
+...
+... def __eq__(self, other):
+... return self.symbols == other.symbols
+
+>>> class SMILESEnv(gymnasium.Env):
+... observation_space = SMILES("][()CO=")
+... action_space = gymnasium.spaces.Discrete(7)
+...
+... def reset(self):
+... self._state = "["
+... return self._state
+...
+... def step(self, action):
+... self._state += self.observation_space.symbols[action]
+... reward = done = (action == 0)
+... return (self._state, float(reward), done, {})
+
+>>> envs = gymnasium.vector.AsyncVectorEnv(
+... [lambda: SMILESEnv()] * 3,
+... shared_memory=False
+... )
+>>> envs.reset()
+>>> observations, rewards, dones, infos = envs.step(np.array([2, 5, 4]))
+>>> observations
+('[(', '[O', '[C')
+```
+
+Custom observation and action spaces may inherit from the `gymnasium.Space` class. However, most use-cases should be covered by the existing space classes (e.g. `gymnasium.spaces.Box`, `gymnasium.spaces.Discrete`, etc...), and container classes (`gymnasium.spaces.Tuple` and `gymnasium.spaces.Dict`). Moreover, some implementations of reinforcement learning algorithms might not handle custom spaces properly. Use custom spaces with care.
+
+If you use `AsyncVectorEnv` with a custom observation space, you must set ``shared_memory=False``, since shared memory and automatic batching is not compatible with custom spaces. In general if you use custom spaces with `AsyncVectorEnv`, the elements of those spaces must be `pickleable`.
+
diff --git a/docs/environments/atari/adventure.md b/docs/environments/atari/adventure.md
new file mode 100644
index 000000000..5e059bc0a
--- /dev/null
+++ b/docs/environments/atari/adventure.md
@@ -0,0 +1,78 @@
+---
+title: Adventure
+firstpage:
+---
+
+# Adventure
+
+```{figure} ../../_static/videos/atari/adventure.gif
+:width: 120px
+:name: adventure
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|-------------------|--------------------------------|
+| Action Space | Discrete(18) |
+| Observation Space | (250, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Adventure-v5")` |
+
+### Description
+You must find the enchanted chalice and return it to the golden castle. You can pick up various objects (keys, a sword,
+a bridge, or a magnet) and have to fight or outmanoeuvre dragons.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Adventure-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------|----------------------|----------------|
+| Adventure | `[0, 1, 2]` | `[0, ..., 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Adventure-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
\ No newline at end of file
diff --git a/docs/environments/atari/air_raid.md b/docs/environments/atari/air_raid.md
new file mode 100644
index 000000000..d0331d4ab
--- /dev/null
+++ b/docs/environments/atari/air_raid.md
@@ -0,0 +1,86 @@
+---
+title: Air Raid
+---
+
+# Air Raid
+
+```{figure} ../../_static/videos/atari/air_raid.gif
+:width: 120px
+:name: air_raid
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (250, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/AirRaid-v5")` |
+
+### Description
+You control a ship that can move sideways. You must protect two buildings (one on the right and one on the left side of the screen) from
+flying saucers that are trying to drop bombs on them.
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | RIGHTFIRE |
+| 5 | LEFTFIRE |
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/AirRaid-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| AirRaid | `[1, ..., 8]` | `[0]` | `1` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("AirRaid-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
\ No newline at end of file
diff --git a/docs/environments/atari/alien.md b/docs/environments/atari/alien.md
new file mode 100644
index 000000000..798ce543b
--- /dev/null
+++ b/docs/environments/atari/alien.md
@@ -0,0 +1,84 @@
+---
+title: Alien
+---
+
+# Alien
+
+```{figure} ../../_static/videos/atari/alien.gif
+:width: 120px
+:name: alien
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Alien-v5")` |
+
+### Description
+You are stuck in a maze-like space ship with three aliens. You goal is to destroy their eggs that are scattered
+all over the ship while simultaneously avoiding the aliens (they are trying to kill you). You have a flamethrower that can help you turn
+them away in tricky situations. Moreover, you can occasionally collect a power-up (pulsar) that gives you the temporary ability to kill aliens.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+You score points by destroying eggs, killing aliens, using pulsars, and collecting special prizes. When you are caught
+by an alien, you will lose one of your lives. The number of lives you have depends on the game flavor. For a
+table of scores corresponding to the different achievements, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=815).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Alien-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Alien | `[0, ..., 3]` | `[0, ..., 3]` | `0` |
+
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Alien-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/amidar.md b/docs/environments/atari/amidar.md
new file mode 100644
index 000000000..1916d6c5d
--- /dev/null
+++ b/docs/environments/atari/amidar.md
@@ -0,0 +1,96 @@
+---
+title: Amidar
+---
+
+# Amidar
+
+```{figure} ../../_static/videos/atari/amidar.gif
+:width: 120px
+:name: amidar
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Amidar-v5")` |
+
+### Description
+This game is similar to Pac-Man: You are trying to visit all places on a 2-dimensional grid while simultaneously avoiding
+your enemies. You can turn the tables at one point in the game: Your enemies turn into chickens and you can
+catch them. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPFIRE |
+| 7 | RIGHTFIRE |
+| 8 | LEFTFIRE |
+| 9 | DOWNFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+You score points by traversing new parts of the grid. Coloring an entire box in the maze or catching chickens gives extra points.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=817).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Amidar-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Amidar | `[0]` | `[0, 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Amidar-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/assault.md b/docs/environments/atari/assault.md
new file mode 100644
index 000000000..3b7862fb1
--- /dev/null
+++ b/docs/environments/atari/assault.md
@@ -0,0 +1,84 @@
+---
+title: Assault
+---
+# Assault
+
+```{figure} ../../_static/videos/atari/assault.gif
+:width: 120px
+:name: assault
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Assault-v5")` |
+
+### Description
+You control a vehicle that can move sideways. A big mother ship circles overhead and continually deploys smaller drones.
+You must destroy these enemies and dodge their attacks. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=827).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | RIGHTFIRE |
+| 6 | LEFTFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Assault-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Assault | `[0]` | `[0]` | `0` |
+
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Assault-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/asterix.md b/docs/environments/atari/asterix.md
new file mode 100644
index 000000000..5fcfd08d2
--- /dev/null
+++ b/docs/environments/atari/asterix.md
@@ -0,0 +1,93 @@
+---
+title: Asterix
+---
+# Asterix
+
+```{figure} ../../_static/videos/atari/asterix.gif
+:width: 120px
+:name: asterix
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Asterix-v5")` |
+
+### Description
+You are Asterix and can move horizontally (continuously) and vertically (discretely). Objects
+move horizontally across the screen: lyres and other (more useful) objects. Your goal is to guide
+Asterix in such a way as to avoid lyres and collect as many other objects as possible. You score points by collecting
+objects and lose a life whenever you collect a lyre. You have three lives available at the beginning. If you score sufficiently
+many points, you will be awarded additional points.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | UP |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | UPRIGHT |
+| 6 | UPLEFT |
+| 7 | DOWNRIGHT |
+| 8 | DOWNLEFT |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+A table of scores awarded for collecting the different objects is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=3325).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Asterix-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Asterix | `[0]` | `[0]` | `0` |
+
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Asterix-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
\ No newline at end of file
diff --git a/docs/environments/atari/asteroids.md b/docs/environments/atari/asteroids.md
new file mode 100644
index 000000000..53300094d
--- /dev/null
+++ b/docs/environments/atari/asteroids.md
@@ -0,0 +1,101 @@
+---
+title: Asteroids
+---
+# Asteroids
+
+```{figure} ../../_static/videos/atari/asteroids.gif
+:width: 120px
+:name: asteroids
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Asteroids-v5")` |
+
+### Description
+This is a well-known arcade game: You control a spaceship in an asteroid field and must break up asteroids by shooting
+them. Once all asteroids are destroyed, you enter a new level and new asteroids will appear. You will occasionally
+be attacked by a flying saucer.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|-------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPRIGHT |
+| 7 | UPLEFT |
+| 8 | UPFIRE |
+| 9 | RIGHTFIRE |
+| 10 | LEFTFIRE |
+| 11 | DOWNFIRE |
+| 12 | UPRIGHTFIRE |
+| 13 | UPLEFTFIRE |
+
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You score points for destroying asteroids, satellites and UFOs. The smaller the asteroid, the more points you score
+for destroying it.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=828&itemTypeID=HTMLMANUAL).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Asteroids-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Asteroids | `[0, ..., 31, 128]` | `[0, 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Asteroids-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/atlantis.md b/docs/environments/atari/atlantis.md
new file mode 100644
index 000000000..a83336b02
--- /dev/null
+++ b/docs/environments/atari/atlantis.md
@@ -0,0 +1,92 @@
+---
+title: Atlantis
+---
+
+# Atlantis
+
+```{figure} ../../_static/videos/atari/atlantis.gif
+:width: 120px
+:name: atlantis
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Atlantis-v5")` |
+
+### Description
+Your job is to defend the submerged city of Atlantis. Your enemies slowly descend towards the city and you must
+destroy them before they reach striking distance. To this end, you control three defense posts.
+You lose if your enemies manage to destroy all seven of Atlantis' installations. You may rebuild installations
+after you have fought of a wave of enemies and scored a sufficient number of points.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|-----------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | RIGHTFIRE |
+| 3 | LEFTFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+You score points for destroying enemies, keeping installations protected during attack waves. You score more points
+if you manage to destroy your enemies with one of the outer defense posts.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=835).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Atlantis-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Atlantis | `[0, ..., 3]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Amidar-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/bank_heist.md b/docs/environments/atari/bank_heist.md
new file mode 100644
index 000000000..d83f83ebd
--- /dev/null
+++ b/docs/environments/atari/bank_heist.md
@@ -0,0 +1,83 @@
+---
+title: Bank Heist
+---
+# Bank Heist
+
+```{figure} ../../_static/videos/atari/bank_heist.gif
+:width: 120px
+:name: Bank Heist
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/BankHeist-v5")` |
+
+### Description
+You are a bank robber and (naturally) want to rob as many banks as possible. You control your getaway car and must
+navigate maze-like cities. The police chases you and will appear whenever you rob a bank. You may destroy police cars
+by dropping sticks of dynamite. You can fill up your gas tank by entering a new city.
+At the beginning of the game you have four lives. Lives are lost if you run out of gas, are caught by the police,
+or run over the dynamite you have previously dropped.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+You score points for robbing banks and destroying police cars. If you rob nine or more banks, and then leave the city,
+you will score extra points.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=1008).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/BankHeist-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| BankHeist | `[0, 4, 8, 12, 16, 20, 24, 28]` | `[0, ..., 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("BankHeist-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/battle_zone.md b/docs/environments/atari/battle_zone.md
new file mode 100644
index 000000000..b4a23f88d
--- /dev/null
+++ b/docs/environments/atari/battle_zone.md
@@ -0,0 +1,83 @@
+---
+title: Battle Zone
+---
+
+# Battle Zone
+
+```{figure} ../../_static/videos/atari/battle_zone.gif
+:width: 120px
+:name: BattleZone
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/BattleZone-v5")` |
+
+### Description
+You control a tank and must destroy enemy vehicles. This game is played in a first-person perspective and creates
+a 3D illusion. A radar screen shows enemies around you. You start with 5 lives and gain up to 2 extra lives if you reach
+a sufficient score.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL)
+
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You receive points for destroying enemies.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=859&itemTypeID=HTMLMANUAL).
+
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/BattleZone-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| BattleZone | `[1, 2, 3]` | `[0]` | `1` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("BattleZone-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/beam_rider.md b/docs/environments/atari/beam_rider.md
new file mode 100644
index 000000000..10734f65e
--- /dev/null
+++ b/docs/environments/atari/beam_rider.md
@@ -0,0 +1,102 @@
+---
+layout: env
+title: Beam Rider
+grid:
+ - Action Space: Discrete(18)
+ - Observation Shape: (210, 160, 3)
+ - Observation High: 255
+ - Observation Low: 0
+ - Import: gymnasium.make("ALE/BeamRider-v5")
+---
+# Beam Rider
+
+```{figure} ../../_static/videos/atari/beam_rider.gif
+:width: 120px
+:name: BeamRider
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/BeamRider-v5")` |
+
+### Description
+You control a space-ship that travels forward at a constant speed. You can only steer it sideways between discrete
+positions. Your goal is to destroy enemy ships, avoid their attacks and dodge space debris.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|-------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | UPRIGHT |
+| 6 | UPLEFT |
+| 7 | RIGHTFIRE |
+| 8 | LEFTIFIRE |
+
+
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You score points for destroying enemies.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_thumbs.php?SystemID=2600&SoftwareID=860&itemTypeID=MANUAL).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/BeamRider-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| BeamRider | `[0]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("BeamRider-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/berzerk.md b/docs/environments/atari/berzerk.md
new file mode 100644
index 000000000..85e28100d
--- /dev/null
+++ b/docs/environments/atari/berzerk.md
@@ -0,0 +1,80 @@
+---
+title: Berzerk
+---
+# Berzerk
+
+```{figure} ../../_static/videos/atari/berzerk.gif
+:width: 120px
+:name: Berzerk
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Berzerk-v5")` |
+
+### Description
+You are stuck in a maze with evil robots. You must destroy them and avoid touching the walls of the maze, as this will kill you. You may be awarded extra lives after scoring a sufficient number of points, depending on the game mode.
+You may also be chased by an undefeatable enemy, Evil Otto, that you must avoid. Evil Otto does not appear in the default mode.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You score points for destroying robots.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=866&itemTypeID=HTMLMANUAL).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Berzerk-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Berzerk | `[1, ..., 9, 16, 17, 18]` | `[0]` | `1` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Berzerk-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/bowling.md b/docs/environments/atari/bowling.md
new file mode 100644
index 000000000..07103a9aa
--- /dev/null
+++ b/docs/environments/atari/bowling.md
@@ -0,0 +1,94 @@
+---
+title: Bowling
+---
+# Bowling
+
+```{figure} ../../_static/videos/atari/bowling.gif
+:width: 120px
+:name: Bowling
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Bowling-v5")` |
+
+### Description
+Your goal is to score as many points as possible in the game of Bowling. A game consists of 10 frames and you have two
+tries per frame. Knocking down all pins on the first try is called a "strike". Knocking down all pins on the second roll
+is called a "spar". Otherwise, the frame is called "open".
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|-------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | DOWN |
+| 4 | UPFIRE |
+| 5 | DOWNFIRE |
+
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You receive points for knocking down pins. The exact score depends on whether you manage a "strike", "spare" or "open"
+frame. Moreover, the points you score for one frame may depend on following frames.
+You can score up to 300 points in one game (if you manage to do 12 strikes).
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=879).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Bowling-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Bowling | `[0, 2, 4]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Bowling-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/boxing.md b/docs/environments/atari/boxing.md
new file mode 100644
index 000000000..046afca12
--- /dev/null
+++ b/docs/environments/atari/boxing.md
@@ -0,0 +1,79 @@
+---
+title: Boxing
+---
+# Boxing
+
+```{figure} ../../_static/videos/atari/boxing.gif
+:width: 120px
+:name: Boxing
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Boxing-v5")` |
+
+### Description
+You fight an opponent in a boxing ring. You score points for hitting the opponent. If you score 100 points, your opponent is
+knocked out.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+You score points by landing punches.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=882).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Boxing-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Boxing | `[0]` | `[0, ..., 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Boxing-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/breakout.md b/docs/environments/atari/breakout.md
new file mode 100644
index 000000000..a66ba2225
--- /dev/null
+++ b/docs/environments/atari/breakout.md
@@ -0,0 +1,89 @@
+---
+title: Breakout
+---
+# Breakout
+
+```{figure} ../../_static/videos/atari/breakout.gif
+:width: 120px
+:name: Breakout
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Breakout-v5")` |
+
+### Description
+Another famous Atari game. The dynamics are similar to pong: You move a paddle and hit the ball in a brick wall at the
+top of the screen. Your goal is to destroy the brick wall. You can try to break through the wall and let the ball
+wreak havoc on the other side, all on its own! You have five lives.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | RIGHT |
+| 3 | LEFT |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+You score points by destroying bricks in the wall. The reward for destroying a brick depends on the color of the brick.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=889).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Breakout-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Breakout | `[0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Breakout-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/carnival.md b/docs/environments/atari/carnival.md
new file mode 100644
index 000000000..4aed37d9b
--- /dev/null
+++ b/docs/environments/atari/carnival.md
@@ -0,0 +1,93 @@
+---
+title: Carnival
+---
+# Carnival
+
+```{figure} ../../_static/videos/atari/carnival.gif
+:width: 120px
+:name: Carnival
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (214, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Carnival-v5")` |
+
+### Description
+This is a "shoot 'em up" game. Targets move horizontally across the screen and you must shoot them. You are
+in control of a gun that can be moved horizontally. The supply of ammunition is limited and chickens may steal some bullets
+from you if you don't hit them in time.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+
+| Num | Action |
+|-----|-----------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | RIGHTFIRE |
+| 5 | LEFTFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+You score points by destroying targets. Points (or bullets) may be subtracted if you hit the target when it shows a minus sign.
+You will score extra points if it shows a plus sign!
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=908).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Carnival-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Carnival | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Carnival-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/centipede.md b/docs/environments/atari/centipede.md
new file mode 100644
index 000000000..0b6af67cc
--- /dev/null
+++ b/docs/environments/atari/centipede.md
@@ -0,0 +1,84 @@
+---
+title: Centipede
+---
+# Centipede
+
+```{figure} ../../_static/videos/atari/centipede.gif
+:width: 120px
+:name: Centipede
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Centipede-v5")` |
+
+### Description
+You are an elf and must use your magic wands to fend off spiders, fleas and centipedes. Your goal is to protect mushrooms in
+an enchanted forest. If you are bitten by a spider, flea or centipede, you will be temporally paralyzed and you will
+lose a magic wand. The game ends once you have lost all wands. You may receive additional wands after scoring
+a sufficient number of points.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (210, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You score points by hitting centipedes, scorpions, fleas and spiders. Additional points are awarded after every round
+(i.e. after you have lost a wand) for mushrooms that were not destroyed.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=911).
+
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Centipede-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Centipede | `[22, 86]` | `[0]` | `22` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Centipede-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/chopper_command.md b/docs/environments/atari/chopper_command.md
new file mode 100644
index 000000000..1ffea7b1c
--- /dev/null
+++ b/docs/environments/atari/chopper_command.md
@@ -0,0 +1,81 @@
+---
+title: Chopper Command
+---
+# Chopper Command
+
+```{figure} ../../_static/videos/atari/chopper_command.gif
+:width: 120px
+:name: ChopperCommand
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/ChopperCommand-v5")` |
+
+### Description
+You control a helicopter and must protect truck convoys. To that end, you need to shoot down enemy aircraft.
+A mini-map is displayed at the bottom of the screen.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (210, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You score points by destroying planes and other helicopters. You score extra points at the end of every wave, depending on the number
+of trucks that have survived.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareID=921).
+
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/ChopperCommand-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| ChopperCommand | `[0, 2]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("ChopperCommand-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/complete_list.html b/docs/environments/atari/complete_list.html
new file mode 100644
index 000000000..14ea1a2b1
--- /dev/null
+++ b/docs/environments/atari/complete_list.html
@@ -0,0 +1,749 @@
+
+
+
+
\ No newline at end of file
diff --git a/docs/environments/atari/complete_list.md b/docs/environments/atari/complete_list.md
new file mode 100644
index 000000000..a756087b7
--- /dev/null
+++ b/docs/environments/atari/complete_list.md
@@ -0,0 +1,4 @@
+# Complete List - Atari
+```{raw} html
+:file: complete_list.html
+```
\ No newline at end of file
diff --git a/docs/environments/atari/crazy_climber.md b/docs/environments/atari/crazy_climber.md
new file mode 100644
index 000000000..f4c95ec22
--- /dev/null
+++ b/docs/environments/atari/crazy_climber.md
@@ -0,0 +1,96 @@
+---
+title: Crazy Climber
+---
+# Crazy Climber
+
+```{figure} ../../_static/videos/atari/crazy_climber.gif
+:width: 120px
+:name: CrazyClimber
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (250, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/CrazyClimber-v5")` |
+
+### Description
+You are a climber trying to reach the top of four builidings, while avoiding obstacles like closing
+windows and falling objects. When you receive damage (windows closing or objects) you will fall and
+lose one life; you have a total of 5 lives before the end games. At the top of each building, there's
+a helicopter which you need to catch to get to the next building. The goal is to climb as fast as
+possible while receiving the least amount of damage.
+
+
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | UP |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | UPRIGHT |
+| 6 | UPLEFT |
+| 7 | DOWNRIGHT |
+| 8 | DOWNLEFT |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+A table of scores awarded for completing each row of a building is provided on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=113).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/CrazyClimber-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| CrazyClimber | `[0, ..., 3]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("CrazyClimber-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/defender.md b/docs/environments/atari/defender.md
new file mode 100644
index 000000000..3dd1fb7ee
--- /dev/null
+++ b/docs/environments/atari/defender.md
@@ -0,0 +1,84 @@
+---
+title: Defender
+---
+
+# Defender
+
+```{figure} ../../_static/videos/atari/defender.gif
+:width: 120px
+:name: Defender
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|-------------------|-------------------------------|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Defender-v5")` |
+
+### Description
+Aliens attack the earth. You control a spaceship and must defend humanity by destroying alien ships and rescuing humanoids.
+You have three lives and three smart bombs. You lose a live when you are shot down by an alien spaceship.
+Points are scored by destroying enemies and retrieving humans that are being abducted. You have an unlimited number of
+laser missiles.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128)
+
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You receive points for destroying enemies, rescuing abducted humans and keeping humans alive.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=128).
+
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Defender-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------|--------------------|---------------|
+| Defender | `[1, ..., 9, 16]` | `[0, 1]` | `1` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Defender-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/demon_attack.md b/docs/environments/atari/demon_attack.md
new file mode 100644
index 000000000..f74e8b940
--- /dev/null
+++ b/docs/environments/atari/demon_attack.md
@@ -0,0 +1,97 @@
+---
+title: Demon Attack
+---
+# Demon Attack
+
+```{figure} ../../_static/videos/atari/demon_attack.gif
+:width: 120px
+:name: DemonAttack
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/DemonAttack-v5")` |
+
+### Description
+You are facing waves of demons in the ice planet of Krybor. Points are accumulated by destroying
+demons. You begin with 3 reserve bunkers, and can increase its number (up to 6) by avoiding enemy
+attacks. Each attack wave you survive without any hits, grants you a new bunker. Every time an enemy
+hits you, a bunker is destroyed. When the last bunker falls, the next enemy hit will destroy you and
+the game ends.
+
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | UP |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | UPRIGHT |
+| 6 | UPLEFT |
+| 7 | DOWNRIGHT |
+| 8 | DOWNLEFT |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+Each enemy you slay gives you points. The amount of points depends on the type of demon and which
+wave you are in. A detailed table of scores is provided on [the AtariAge
+page](https://atariage.com/manual_html_page.php?SoftwareLabelID=135).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/DemonAttack-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| DemonAttack | `[1, 3, 5, 7]` | `[0, 1]` | `1` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("DemonAttack-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/double_dunk.md b/docs/environments/atari/double_dunk.md
new file mode 100644
index 000000000..bea4a03d4
--- /dev/null
+++ b/docs/environments/atari/double_dunk.md
@@ -0,0 +1,95 @@
+---
+title: Double Dunk
+---
+# Double Dunk
+
+```{figure} ../../_static/videos/atari/double_dunk.gif
+:width: 120px
+:name: DoubleDunk
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (250, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/DoubleDunk-v5")` |
+
+### Description
+You are playing a 2v2 game of basketball. At the start of each possession, you select between a set
+of different plays and then execute them to either score or prevent your rivals from scoring. The
+game lasts a set amount of time or until one of the teams reaches a certain score
+
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=153).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | UP |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | UPRIGHT |
+| 6 | UPLEFT |
+| 7 | DOWNRIGHT |
+| 8 | DOWNLEFT |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+Scores follow the rules of basketball. You can get either 3 points, 2 points foul line) depending
+from where you shoot. After a defensive foul, a successful shot from the foul line gives you 1
+point.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/DoubleDunk-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| DoubleDunk | `[0, ..., 15]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("DoubleDunk-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/elevator_action.md b/docs/environments/atari/elevator_action.md
new file mode 100644
index 000000000..6d693e12e
--- /dev/null
+++ b/docs/environments/atari/elevator_action.md
@@ -0,0 +1,98 @@
+---
+title: Elevator Action
+---
+# Elevator Action
+
+```{figure} ../../_static/videos/atari/elevator_action.gif
+:width: 120px
+:name: ElevatorAction
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (250, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/ElevatorAction-v5")` |
+
+### Description
+You are a secret agent that must retrieve some secret documents and reach the ground level of a
+building by going down an elevator/stairs. Once you reach the ground level, you are picked up and
+taken to the next level. You are equipped with a gun to defend yourself against enemy agents waiting
+for you in each floor. You gather points by shooting down enemy agents and visiting apartments
+marked with a red door, which contain the secret documents.
+
+This is an unreleased prototype based on the arcade game. Limited documentation can be found on [the AtariAge
+page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=1131).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | UP |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | UPRIGHT |
+| 6 | UPLEFT |
+| 7 | DOWNRIGHT |
+| 8 | DOWNLEFT |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You start with 4 lives and are awarded 100 points for each enemy shot, and 500 points for each
+secret document collected (visiting a red door). Each time you get shot you lose one life and the
+game ends when losing all lives.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/ElevatorAction-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| ElevatorAction | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("ElevatorAction-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/enduro.md b/docs/environments/atari/enduro.md
new file mode 100644
index 000000000..5faf4a7f8
--- /dev/null
+++ b/docs/environments/atari/enduro.md
@@ -0,0 +1,93 @@
+---
+title: Enduro
+---
+# Enduro
+
+```{figure} ../../_static/videos/atari/enduro.gif
+:width: 120px
+:name: Enduro
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (250, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Enduro-v5")` |
+
+### Description
+You are a racer in the National Enduro, a long-distance endurance race. You must overtake a certain
+amount of cars each day to stay on the race. The first day you need to pass 200 cars, and 300 for
+each following day. The game ends if you do not meet your overtake quota for the day.
+
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=163).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | UP |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | UPRIGHT |
+| 6 | UPLEFT |
+| 7 | DOWNRIGHT |
+| 8 | DOWNLEFT |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You get 1 point for each vehicle you overtake.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Enduro-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Enduro | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Enduro-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/fishing_derby.md b/docs/environments/atari/fishing_derby.md
new file mode 100644
index 000000000..3ce8aab8c
--- /dev/null
+++ b/docs/environments/atari/fishing_derby.md
@@ -0,0 +1,109 @@
+---
+title: FishingDerby
+---
+# FishingDerby
+
+```{figure} ../../_static/videos/atari/fishing_derby.gif
+:width: 120px
+:name: FishingDerby
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/FishingDerby-v5")` |
+
+### Description
+your objective is to catch more sunfish than your opponent. But it's not just between you and the other fisherman, as a big, black shark is lurking just below the surface, waiting to steal your catch! Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=182).
+
+### Rewards
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
+find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=182).
+
+
+Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
+### Action Space
+The action space a subset of the following discrete set of legal actions:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPRIGHT |
+| 7 | UPLEFT |
+| 8 | DOWNRIGHT |
+| 9 | DOWNLEFT |
+| 10 | UPFIRE |
+| 11 | RIGHTFIRE |
+| 12 | LEFTFIRE |
+| 13 | DOWNFIRE |
+| 14 | UPRIGHTFIRE |
+| 15 | UPLEFTFIRE |
+| 16 | DOWNRIGHTFIRE |
+| 17 | DOWNLEFTFIRE |
+
+If you use v0 or v4 and the environment is initialized via `make`, the action space will usually be much smaller since most legal actions don't have
+any effect. Thus, the enumeration of the actions will differ. The action space can be expanded to the full
+legal space by passing the keyword argument `full_action_space=True` to `make`.
+
+The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing
+the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the
+action spaces of default flavors.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/FishingDerby-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| FishingDerby | `[0]` | `[0, ..., 3]` | `0` |
+
+
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("FishingDerby-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+|Version|`frameskip=`|`repeat_action_probability=`|`full_action_space=`|
+| ----- | --------- | ------------------------- | ---------|
+|v0 |`(2, 5,)` |`0.25` |`False` |
+|v4 |`(2, 5,)` |`0.0` |`False` |
+|v5 |`5` |`0.25` |`True` |
+
+> Version v5 follows the best practices outlined in [[2]](#2). Thus, it is recommended to transition to v5 and
+> customize the environment using the arguments above, if necessary.
diff --git a/docs/environments/atari/freeway.md b/docs/environments/atari/freeway.md
new file mode 100644
index 000000000..7fd31e326
--- /dev/null
+++ b/docs/environments/atari/freeway.md
@@ -0,0 +1,109 @@
+---
+title: Freeway
+---
+# Freeway
+
+```{figure} ../../_static/videos/atari/freeway.gif
+:width: 120px
+:name: Freeway
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Freeway-v5")` |
+
+### Description
+your objective is to guide your chicken across lane after lane of busy rush hour traffic. You receive a point for every chicken that makes it to the top of the screen after crossing all the lanes of traffic. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=192).
+
+### Rewards
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
+find these manuals on [AtariAge](https://atariage.com/manual_thumbs.php?SoftwareLabelID=192).
+
+
+Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
+### Action Space
+The action space a subset of the following discrete set of legal actions:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPRIGHT |
+| 7 | UPLEFT |
+| 8 | DOWNRIGHT |
+| 9 | DOWNLEFT |
+| 10 | UPFIRE |
+| 11 | RIGHTFIRE |
+| 12 | LEFTFIRE |
+| 13 | DOWNFIRE |
+| 14 | UPRIGHTFIRE |
+| 15 | UPLEFTFIRE |
+| 16 | DOWNRIGHTFIRE |
+| 17 | DOWNLEFTFIRE |
+
+If you use v0 or v4 and the environment is initialized via `make`, the action space will usually be much smaller since most legal actions don't have
+any effect. Thus, the enumeration of the actions will differ. The action space can be expanded to the full
+legal space by passing the keyword argument `full_action_space=True` to `make`.
+
+The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing
+the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the
+action spaces of default flavors.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Freeway-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Freeway | `[0, ..., 7]` | `[0, 1]` | `0` |
+
+
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Freeway-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+|Version|`frameskip=`|`repeat_action_probability=`|`full_action_space=`|
+| ----- | --------- | ------------------------- | ---------|
+|v0 |`(2, 5,)` |`0.25` |`False` |
+|v4 |`(2, 5,)` |`0.0` |`False` |
+|v5 |`5` |`0.25` |`True` |
+
+> Version v5 follows the best practices outlined in [[2]](#2). Thus, it is recommended to transition to v5 and
+> customize the environment using the arguments above, if necessary.
diff --git a/docs/environments/atari/frostbite.md b/docs/environments/atari/frostbite.md
new file mode 100644
index 000000000..7734fd543
--- /dev/null
+++ b/docs/environments/atari/frostbite.md
@@ -0,0 +1,109 @@
+---
+title: Frostbite
+---
+# Frostbite
+
+```{figure} ../../_static/videos/atari/frostbite.gif
+:width: 120px
+:name: Frostbite
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Frostbite-v5")` |
+
+### Description
+ In Frostbite, the player controls "Frostbite Bailey" who hops back and forth across across an Arctic river, changing the color of the ice blocks from white to blue. Each time he does so, a block is added to his igloo. [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=199).
+
+### Rewards
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
+find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=199).
+
+
+Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
+### Action Space
+The action space a subset of the following discrete set of legal actions:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPRIGHT |
+| 7 | UPLEFT |
+| 8 | DOWNRIGHT |
+| 9 | DOWNLEFT |
+| 10 | UPFIRE |
+| 11 | RIGHTFIRE |
+| 12 | LEFTFIRE |
+| 13 | DOWNFIRE |
+| 14 | UPRIGHTFIRE |
+| 15 | UPLEFTFIRE |
+| 16 | DOWNRIGHTFIRE |
+| 17 | DOWNLEFTFIRE |
+
+If you use v0 or v4 and the environment is initialized via `make`, the action space will usually be much smaller since most legal actions don't have
+any effect. Thus, the enumeration of the actions will differ. The action space can be expanded to the full
+legal space by passing the keyword argument `full_action_space=True` to `make`.
+
+The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing
+the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the
+action spaces of default flavors.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Frostbite-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Frostbite | `[0, 2]` | `[0]` | `0` |
+
+
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Frostbite-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+|Version|`frameskip=`|`repeat_action_probability=`|`full_action_space=`|
+| ----- | --------- | ------------------------- | ---------|
+|v0 |`(2, 5,)` |`0.25` |`False` |
+|v4 |`(2, 5,)` |`0.0` |`False` |
+|v5 |`5` |`0.25` |`True` |
+
+> Version v5 follows the best practices outlined in [[2]](#2). Thus, it is recommended to transition to v5 and
+> customize the environment using the arguments above, if necessary.
diff --git a/docs/environments/atari/gopher.md b/docs/environments/atari/gopher.md
new file mode 100644
index 000000000..a1bf80cd7
--- /dev/null
+++ b/docs/environments/atari/gopher.md
@@ -0,0 +1,109 @@
+---
+title: Gopher
+---
+# Gopher
+
+```{figure} ../../_static/videos/atari/gopher.gif
+:width: 120px
+:name: Gopher
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Gopher-v5")` |
+
+### Description
+The player controls a shovel-wielding farmer who protects a crop of three carrots from a gopher. [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=218).
+
+### Rewards
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
+find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=218).
+
+
+Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
+### Action Space
+The action space a subset of the following discrete set of legal actions:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPRIGHT |
+| 7 | UPLEFT |
+| 8 | DOWNRIGHT |
+| 9 | DOWNLEFT |
+| 10 | UPFIRE |
+| 11 | RIGHTFIRE |
+| 12 | LEFTFIRE |
+| 13 | DOWNFIRE |
+| 14 | UPRIGHTFIRE |
+| 15 | UPLEFTFIRE |
+| 16 | DOWNRIGHTFIRE |
+| 17 | DOWNLEFTFIRE |
+
+If you use v0 or v4 and the environment is initialized via `make`, the action space will usually be much smaller since most legal actions don't have
+any effect. Thus, the enumeration of the actions will differ. The action space can be expanded to the full
+legal space by passing the keyword argument `full_action_space=True` to `make`.
+
+The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing
+the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the
+action spaces of default flavors.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Gopher-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Gopher | `[0, 2]` | `[0, 1]` | `0` |
+
+
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Gopher-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+|Version|`frameskip=`|`repeat_action_probability=`|`full_action_space=`|
+| ----- | --------- | ------------------------- | ---------|
+|v0 |`(2, 5,)` |`0.25` |`False` |
+|v4 |`(2, 5,)` |`0.0` |`False` |
+|v5 |`5` |`0.25` |`True` |
+
+> Version v5 follows the best practices outlined in [[2]](#2). Thus, it is recommended to transition to v5 and
+> customize the environment using the arguments above, if necessary.
diff --git a/docs/environments/atari/gravitar.md b/docs/environments/atari/gravitar.md
new file mode 100644
index 000000000..8b5039ba5
--- /dev/null
+++ b/docs/environments/atari/gravitar.md
@@ -0,0 +1,109 @@
+---
+title: Gravitar
+---
+# Gravitar
+
+```{figure} ../../_static/videos/atari/gravitar.gif
+:width: 120px
+:name: Gravitar
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Gravitar-v5")` |
+
+### Description
+The player controls a small blue spacecraft. The game starts in a fictional solar system with several planets to explore. If the player moves his ship into a planet, he will be taken to a side-view landscape. Player has to destroy red bunkers [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=223).
+
+### Rewards
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
+find these manuals on [AtariAge](https://atariage.com/manual_html_page.php?SoftwareLabelID=223).
+
+
+Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
+### Action Space
+The action space a subset of the following discrete set of legal actions:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPRIGHT |
+| 7 | UPLEFT |
+| 8 | DOWNRIGHT |
+| 9 | DOWNLEFT |
+| 10 | UPFIRE |
+| 11 | RIGHTFIRE |
+| 12 | LEFTFIRE |
+| 13 | DOWNFIRE |
+| 14 | UPRIGHTFIRE |
+| 15 | UPLEFTFIRE |
+| 16 | DOWNRIGHTFIRE |
+| 17 | DOWNLEFTFIRE |
+
+If you use v0 or v4 and the environment is initialized via `make`, the action space will usually be much smaller since most legal actions don't have
+any effect. Thus, the enumeration of the actions will differ. The action space can be expanded to the full
+legal space by passing the keyword argument `full_action_space=True` to `make`.
+
+The reduced action space of an Atari environment may depend on the flavor of the game. You can specify the flavor by providing
+the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the
+action spaces of default flavors.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Gravitar-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Gravitar | `[0, ..., 4]` | `[0]` | `0` |
+
+
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Gravitar-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+|Version|`frameskip=`|`repeat_action_probability=`|`full_action_space=`|
+| ----- | --------- | ------------------------- | ---------|
+|v0 |`(2, 5,)` |`0.25` |`False` |
+|v4 |`(2, 5,)` |`0.0` |`False` |
+|v5 |`5` |`0.25` |`True` |
+
+> Version v5 follows the best practices outlined in [[2]](#2). Thus, it is recommended to transition to v5 and
+> customize the environment using the arguments above, if necessary.
diff --git a/docs/environments/atari/hero.md b/docs/environments/atari/hero.md
new file mode 100644
index 000000000..c0832bcbc
--- /dev/null
+++ b/docs/environments/atari/hero.md
@@ -0,0 +1,85 @@
+---
+title: Hero
+---
+
+# Hero
+
+```{figure} ../../_static/videos/atari/hero.gif
+:width: 120px
+:name: Hero
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|-------------------|---------------------------|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Hero-v5")` |
+
+### Description
+You need to rescue miners that are stuck in a mine shaft. You have access to various tools: A propeller backpack that
+allows you to fly wherever you want, sticks of dynamite that can be used to blast through walls, a laser beam to kill
+vermin, and a raft to float across stretches of lava.
+You have a limited amount of power. Once you run out, you lose a live.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228)
+
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You score points for shooting critters, rescuing miners, and dynamiting walls.
+Extra points are rewarded for any power remaining after rescuing a miner.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=228).
+
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Hero-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|--------------------------|--------------------|---------------|
+| Hero | `[0, ..., 4]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Hero-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/ice_hockey.md b/docs/environments/atari/ice_hockey.md
new file mode 100644
index 000000000..cb488b344
--- /dev/null
+++ b/docs/environments/atari/ice_hockey.md
@@ -0,0 +1,82 @@
+---
+title: IceHockey
+---
+# IceHockey
+
+```{figure} ../../_static/videos/atari/ice_hockey.gif
+:width: 120px
+:name: IceHockey
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/IceHockey-v5")` |
+
+### Description
+Your goal is to score as many points as possible in a standard game of Ice Hockey over a 3-minute time period. The ball is usually called "the puck".
+There are 32 shot angles ranging from the extreme left to the extreme right. The angles can only aim towards the opponent's goal.
+Just as in real hockey, you can pass the puck by shooting it off the sides of the rink. This can be really key when you're in position to score a goal.
+ Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. [The general article on Atari environments](https://brosa.ca/blog/ale-release-v0.7) outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+You score points by shooting the puck into your opponent's goal. Your opponent scores in the same manner.
+There are no limits to how many points you can get per game, other than the time limit of 3-minute games.
+For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=241).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/IceHockey-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| IceHockey | `[0, 2]` | `[0, ..., 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Icehockey-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/index.html b/docs/environments/atari/index.html
new file mode 100644
index 000000000..70b72988b
--- /dev/null
+++ b/docs/environments/atari/index.html
@@ -0,0 +1,113 @@
+
+
+
+
\ No newline at end of file
diff --git a/docs/environments/atari/index.md b/docs/environments/atari/index.md
new file mode 100644
index 000000000..990b59925
--- /dev/null
+++ b/docs/environments/atari/index.md
@@ -0,0 +1,288 @@
+---
+firstpage:
+lastpage:
+---
+
+# Atari
+
+A set of Atari 2600 environment simulated through Stella and the Arcade Learning Environment.
+
+```{toctree}
+:hidden:
+adventure
+air_raid
+alien
+amidar
+assault
+asterix
+asteroids
+atlantis
+bank_heist
+battle_zone
+beam_rider
+berzerk
+bowling
+boxing
+breakout
+carnival
+centipede
+chopper_command
+crazy_climber
+defender
+demon_attack
+double_dunk
+elevator_action
+enduro
+fishing_derby
+freeway
+frostbite
+gopher
+gravitar
+hero
+ice_hockey
+jamesbond
+journey_escape
+kangaroo
+krull
+kung_fu_master
+montezuma_revenge
+ms_pacman
+name_this_game
+phoenix
+pitfall
+pong
+pooyan
+private_eye
+qbert
+riverraid
+road_runner
+robotank
+seaquest
+skiing
+solaris
+space_invaders
+star_gunner
+tennis
+time_pilot
+tutankham
+up_n_down
+venture
+video_pinball
+wizard_of_wor
+yars_revenge
+zaxxon
+```
+
+```{raw} html
+ :file: index.html
+```
+
+Atari environments are simulated via the Arcade Learning Environment (ALE) [[1]](#1).
+
+### Action Space
+
+The action space a subset of the following discrete set of legal actions:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPRIGHT |
+| 7 | UPLEFT |
+| 8 | DOWNRIGHT |
+| 9 | DOWNLEFT |
+| 10 | UPFIRE |
+| 11 | RIGHTFIRE |
+| 12 | LEFTFIRE |
+| 13 | DOWNFIRE |
+| 14 | UPRIGHTFIRE |
+| 15 | UPLEFTFIRE |
+| 16 | DOWNRIGHTFIRE |
+| 17 | DOWNLEFTFIRE |
+
+If you use v0 or v4 and the environment is initialized via `make`, the action space will usually be much smaller since most legal actions don't have
+any effect. Thus, the enumeration of the actions will differ. The action space can be expanded to the full
+legal space by passing the keyword argument `full_action_space=True` to `make`.
+
+The reduced action space of an Atari environment may depend on the "flavor" of the game. You can specify the flavor by providing
+the arguments `difficulty` and `mode` when constructing the environment. This documentation only provides details on the
+action spaces of default flavor choices.
+
+### Observation Space
+The observation issued by an Atari environment may be:
+- the RGB image that is displayed to a human player,
+- a grayscale version of that image or
+- the state of the 128 Bytes of RAM of the console.
+
+### Rewards
+The exact reward dynamics depend on the environment and are usually documented in the game's manual. You can
+find these manuals on [AtariAge](https://atariage.com/).
+
+### Stochasticity
+It was pointed out in [[1]](#1) that Atari games are entirely deterministic. Thus, agents could achieve
+state of the art performance by simply memorizing an optimal sequence of actions while completely ignoring observations from the environment.
+To avoid this, ALE implements sticky actions: Instead of always simulating the action passed to the environment, there is a small
+probability that the previously executed action is used instead.
+
+On top of this, Gymnasium implements stochastic frame skipping: In each environment step, the action is repeated for a random
+number of frames. This behavior may be altered by setting the keyword argument `frameskip` to either a positive integer or
+a tuple of two positive integers. If `frameskip` is an integer, frame skipping is deterministic, and in each step the action is
+repeated `frameskip` many times. Otherwise, if `frameskip` is a tuple, the number of skipped frames is chosen uniformly at
+random between `frameskip[0]` (inclusive) and `frameskip[1]` (exclusive) in each environment step.
+
+
+### Common Arguments
+When initializing Atari environments via `gymnasium.make`, you may pass some additional arguments. These work for any
+Atari environment. However, legal values for `mode` and `difficulty` depend on the environment.
+
+
+- **mode**: `int`. Game mode, see [[2]](#2). Legal values depend on the environment and are listed in the table above.
+
+- **difficulty**: `int`. Difficulty of the game, see [[2]](#2). Legal values depend on the environment and are listed in
+the table above. Together with `mode`, this determines the "flavor" of the game.
+
+- **obs_type**: `str`. This argument determines what observations are returned by the environment. Its values are:
+ - ram: The 128 Bytes of RAM are returned
+ - rgb: An RGB rendering of the game is returned
+ - grayscale: A grayscale rendering is returned
+
+- **frameskip**: `int` or a tuple of two `int`s. This argument controls stochastic frame skipping, as described in the section on stochasticity.
+
+- **repeat_action_probability**: `float`. The probability that an action sticks, as described in the section on stochasticity.
+
+- **full_action_space**: `bool`. If set to `True`, the action space consists of all legal actions on the console. Otherwise, the
+action space will be reduced to a subset.
+
+- **render_mode**: `str`. Specifies the rendering mode. Its values are:
+ - human: We'll interactively display the screen and enable game sounds. This will lock emulation to the ROMs specified FPS
+ - rgb_array: we'll return the `rgb` key in step metadata with the current environment RGB frame.
+> It is highly recommended to specify `render_mode` during construction instead of calling `env.render()`.
+> This will guarantee proper scaling, audio support, and proper framerates
+
+
+### Version History and Naming Schemes
+All Atari games are available in three versions. They differ in the default settings of the arguments above.
+The differences are listed in the following table:
+
+|Version|`frameskip=`|`repeat_action_probability=`|`full_action_space=`|
+| ----- | --------- | ------------------------- | ---------|
+|v0 |`(2, 5,)` |`0.25` |`False` |
+|v4 |`(2, 5,)` |`0.0` |`False` |
+|v5 |`5` |`0.25` |`True` |
+
+> Version v5 follows the best practices outlined in [[2]](#2). Thus, it is recommended to transition to v5 and
+> customize the environment using the arguments above, if necessary.
+
+For each Atari game, several different configurations are registered in Gymnasium. The naming schemes are analogous for
+v0 and v4. Let us take a look at all variations of Amidar-v0 that are registered with gymnasium:
+
+|Name |`obs_type=`|`frameskip=`|`repeat_action_probability=`|`full_action_space=`|
+| ---------------------------- | -------- | --------- | ------------------------- | ----------------- |
+|Amidar-v0 |`"rgb"` |`(2, 5,)` |`0.25` |`False` |
+|AmidarDeterministic-v0 |`"rgb"` |`4` |`0.0` |`False` |
+|AmidarNoframeskip-v0 |`"rgb"` |`1` |`0.25` |`False` |
+|Amidar-ram-v0 |`"ram"` |`(2, 5,)` |`0.25` |`False` |
+|Amidar-ramDeterministic-v0 |`"ram"` |`4` |`0.0` |`False` |
+|Amidar-ramNoframeskip-v0 |`"ram"` |`1` |`0.25` |`False` |
+
+Things change in v5: The suffixes "Deterministic" and "NoFrameskip" are no longer available. Instead, you must specify the
+environment configuration via arguments passed to `gymnasium.make`. Moreover, the v5 environments
+are in the "ALE" namespace. The suffix "-ram" is still available. Thus, we get the following table:
+
+|Name |`obs_type=`|`frameskip=`|`repeat_action_probability=`|`full_action_space=`|
+| ---------------------------- | -------- | --------- | ------------------------- | ----------------- |
+|ALE/Amidar-v5 |`"rgb"` |`5` |`0.25` |`True` |
+|ALE/Amidar-ram-v5 |`"ram"` |`5` |`0.25` |`True` |
+
+### Flavors
+Some games allow the user to set a difficulty level and a game mode. Different modes/difficulties may have different
+game dynamics and (if a reduced action space is used) different action spaces. We follow the convention of [[2]](#2) and
+refer to the combination of difficulty level and game mode as an flavor of a game. The following table shows
+the available modes and difficulty levels for different Atari games:
+
+| Environment | Valid Modes | Default Mode |
+|------------------|-------------------------------------------------|----------------|
+| Adventure | `[0, 1, 2]` | `0` |
+| AirRaid | `[1, ..., 8]` | `1` |
+| Alien | `[0, ..., 3]` | `0` |
+| Amidar | `[0]` | `0` |
+| Assault | `[0]` | `0` |
+| Asterix | `[0]` | `0` |
+| Asteroids | `[0, ..., 31, 128]` | `0` |
+| Atlantis | `[0, ..., 3]` | `0` |
+| BankHeist | `[0, 4, 8, 12, 16, 20, 24, 28]` | `0` |
+| BattleZone | `[1, 2, 3]` | `1` |
+| BeamRider | `[0]` | `0` |
+| Berzerk | `[1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 18]` | `1` |
+| Bowling | `[0, 2, 4]` | `0` |
+| Boxing | `[0]` | `0` |
+| Breakout | `[0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44]` | `0` |
+| Carnival | `[0]` | `0` |
+| Centipede | `[22, 86]` | `22` |
+| ChopperCommand | `[0, 2]` | `0` |
+| CrazyClimber | `[0, ..., 3]` | `0` |
+| Defender | `[1, ..., 9, 16]` | `1` |
+| DemonAttack | `[1, 3, 5, 7]` | `1` |
+| DoubleDunk | `[0, ..., 15]` | `0` |
+| ElevatorAction | `[0]` | `0` |
+| Enduro | `[0]` | `0` |
+| FishingDerby | `[0]` | `0` |
+| Freeway | `[0, ..., 7]` | `0` |
+| Frostbite | `[0, 2]` | `0` |
+| Gopher | `[0, 2]` | `0` |
+| Gravitar | `[0, ..., 4]` | `0` |
+| Hero | `[0, ..., 4]` | `0` |
+| IceHockey | `[0, 2]` | `0` |
+| Jamesbond | `[0, 1]` | `0` |
+| JourneyEscape | `[0]` | `0` |
+| Kangaroo | `[0, 1]` | `0` |
+| Krull | `[0]` | `0` |
+| KungFuMaster | `[0]` | `0` |
+| MontezumaRevenge | `[0]` | `0` |
+| MsPacman | `[0, ..., 3]` | `0` |
+| NameThisGame | `[8, 24, 40]` | `8` |
+| Phoenix | `[0]` | `0` |
+| Pitfall | `[0]` | `0` |
+| Pong | `[0, 1]` | `0` |
+| Pooyan | `[10, 30, 50, 70]` | `10` |
+| PrivateEye | `[0, ..., 4]` | `0` |
+| Qbert | `[0]` | `0` |
+| Riverraid | `[0]` | `0` |
+| RoadRunner | `[0]` | `0` |
+| Robotank | `[0]` | `0` |
+| Seaquest | `[0]` | `0` |
+| Skiing | `[0]` | `0` |
+| Solaris | `[0]` | `0` |
+| SpaceInvaders | `[0, ..., 15]` | `0` |
+| StarGunner | `[0, ..., 3]` | `0` |
+| Tennis | `[0, 2]` | `0` |
+| TimePilot | `[0]` | `0` |
+| Tutankham | `[0, 4, 8, 12]` | `0` |
+| UpNDown | `[0]` | `0` |
+| Venture | `[0]` | `0` |
+| VideoPinball | `[0, 2]` | `0` |
+| WizardOfWor | `[0]` | `0` |
+| YarsRevenge | `[0, 32, 64, 96]` | `0` |
+| Zaxxon | `[0, 8, 16, 24]` | `0` |
+
+> Each game also has a valid difficulty for the opposing AI, which has a different range depending on the game. These values can have a range of 0 - n, where n can be found at [the ALE documentation](https://github.com/mgbellemare/Arcade-Learning-Environment/blob/master/docs/games.md)
+
+### References
+
+(#1)=
+[1]
+MG Bellemare, Y Naddaf, J Veness, and M Bowling.
+"The arcade learning environment: An evaluation platform for general agents."
+Journal of Artificial Intelligence Research (2012).
+
+(#2)=
+[2]
+Machado et al.
+"Revisiting the Arcade Learning Environment: Evaluation Protocols
+and Open Problems for General Agents"
+Journal of Artificial Intelligence Research (2018)
+URL: https://jair.org/index.php/jair/article/view/11182
\ No newline at end of file
diff --git a/docs/environments/atari/jamesbond.md b/docs/environments/atari/jamesbond.md
new file mode 100644
index 000000000..103ea6aa5
--- /dev/null
+++ b/docs/environments/atari/jamesbond.md
@@ -0,0 +1,83 @@
+---
+title: Jamesbond
+---
+# Jamesbond
+
+```{figure} ../../_static/videos/atari/jamesbond.gif
+:width: 120px
+:name: Jamesbond
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Jamesbond-v5")` |
+
+### Description
+Your mission is to control Mr. Bond's specially designed multipurpose craft to complete a variety of missions.
+The craft moves forward with a right motion and slightly back with a left motion.
+An up or down motion causes the craft to jump or dive.
+You can also fire by either lobbing a bomb to the bottom of the screen or firing a fixed angle shot to the top of the screen.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. [The general article on Atari environments](https://brosa.ca/blog/ale-release-v0.7) outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+The game ends when you complete the last mission or when you lose the last craft. In either case, you'll receive your final score.
+There will be a rating based on your score. The highest rating in NOVICE is 006. The highest rating in AGENT is 007.
+For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=250).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Jamesbond-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Jamesbond | `[0, 1]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Jamesbond-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/journey_escape.md b/docs/environments/atari/journey_escape.md
new file mode 100644
index 000000000..dcdb14e66
--- /dev/null
+++ b/docs/environments/atari/journey_escape.md
@@ -0,0 +1,104 @@
+---
+title: JourneyEscape
+---
+# JourneyEscape
+
+```{figure} ../../_static/videos/atari/journey_escape.gif
+:width: 120px
+:name: JourneyEscape
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/JourneyEscape-v5")` |
+
+### Description
+You must lead all 5 members of JOURNEY through waves of pesky characters and backstage obstacles to the Scarab Escape Vehicle before time runs out.
+You must also protect $50,000 in concert cash from grasping groupies, photographers, and promoters.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPRIGHT |
+| 7 | UPLEFT |
+| 8 | DOWNRIGHT |
+| 9 | DOWNLEFT |
+| 11 | RIGHTFIRE |
+| 12 | LEFTFIRE |
+| 13 | DOWNFIRE |
+| 14 | UPRIGHTFIRE |
+| 15 | UPLEFTFIRE |
+| 16 | DOWNRIGHTFIRE |
+| 17 | DOWNLEFTFIRE |
+
+
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. [The general article on Atari environments](https://brosa.ca/blog/ale-release-v0.7) outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+At the start of the game, you will have $50,000 and 60 units of time.
+Your end game score with be dependent on how much time you have remaining and who you encounter along the way.
+For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=252).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/JourneyEscape-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| JourneyEscape | `[0]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("JourneyEscape-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/kangaroo.md b/docs/environments/atari/kangaroo.md
new file mode 100644
index 000000000..9fd36b4ee
--- /dev/null
+++ b/docs/environments/atari/kangaroo.md
@@ -0,0 +1,81 @@
+---
+title: Kangaroo
+---
+# Kangaroo
+
+```{figure} ../../_static/videos/atari/kangaroo.gif
+:width: 120px
+:name: Kangaroo
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Kangaroo-v5")` |
+
+### Description
+The object of the game is to score as many points as you can while controlling Mother Kangaroo to rescue her precious baby. You start the game with three lives.
+During this rescue mission, Mother Kangaroo encounters many obstacles. You need to help her climb ladders, pick bonus fruit, and throw punches at monkeys.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. [The general article on Atari environments](https://brosa.ca/blog/ale-release-v0.7) outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+Your score will be shown at the top right corner of the game.
+Your end game score with be dependent on how much time you have remaining and who you encounter along the way.
+For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=923).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Kangaroo-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Kangaroo | `[0, 1]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Kangaroo-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/krull.md b/docs/environments/atari/krull.md
new file mode 100644
index 000000000..40c85cfee
--- /dev/null
+++ b/docs/environments/atari/krull.md
@@ -0,0 +1,81 @@
+---
+title: Krull
+---
+# Krull
+
+```{figure} ../../_static/videos/atari/krull.gif
+:width: 120px
+:name: Krull
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Krull-v5")` |
+
+### Description
+Your mission is to find and enter the Beast's Black Fortress, rescue Princess Lyssa, and destroy the Beast.
+The task is not an easy one, for the location of the Black Fortress changes with each sunrise on Krull.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. [The general article on Atari environments](https://brosa.ca/blog/ale-release-v0.7) outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+You will receive various scores for each monster you kill.
+You can play the game until you have lost all your lives.
+For a more detailed documentation, consult [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=267).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Krull-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Krull | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Krull-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/kung_fu_master.md b/docs/environments/atari/kung_fu_master.md
new file mode 100644
index 000000000..3cd270b10
--- /dev/null
+++ b/docs/environments/atari/kung_fu_master.md
@@ -0,0 +1,85 @@
+---
+title: Kung Fu Master
+---
+# Kung Fu Master
+
+```{figure} ../../_static/videos/atari/kung_fu_master.gif
+:width: 120px
+:name: KungFuMaster
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/KungFuMaster-v5")` |
+
+### Description
+You are a Kung-Fu Master fighting your way through the Evil Wizard's temple. Your goal is to rescue Princess Victoria, defeating various enemies along the way. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=268).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | UP |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | DOWNRIGHT |
+| 6 | DOWNLEFT |
+| 7 | RIGHTFIRE |
+| 8 | LEFTFIRE |
+| 9 | DOWNFIRE |
+| 10 | UPRIGHTFIRE |
+| 11 | UPLEFTFIRE |
+| 12 | DOWNRIGHTFIRE |
+| 13 | DOWNLEFTFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/KungFuMaster-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| KungFuMaster | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("KungFuMaster-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/montezuma_revenge.md b/docs/environments/atari/montezuma_revenge.md
new file mode 100644
index 000000000..ca0a937e4
--- /dev/null
+++ b/docs/environments/atari/montezuma_revenge.md
@@ -0,0 +1,70 @@
+---
+title: Montezuma Revenge
+---
+# Montezuma Revenge
+
+```{figure} ../../_static/videos/atari/montezuma_revenge.gif
+:width: 120px
+:name: MontezumaRevenge
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/MontezumaRevenge-v5")` |
+
+### Description
+Your goal is to acquire Montezuma's treasure by making your way through a maze of chambers within the emperor's fortress. You must avoid deadly creatures while collecting valuables and tools which can help you escape with the treasure. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=310).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions
+will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/MontezumaRevenge-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| MontezumaRevenge | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("MontezumaRevenge-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/ms_pacman.md b/docs/environments/atari/ms_pacman.md
new file mode 100644
index 000000000..20814c8bf
--- /dev/null
+++ b/docs/environments/atari/ms_pacman.md
@@ -0,0 +1,81 @@
+---
+title: Ms Pacman
+---
+# Ms Pacman
+
+```{figure} ../../_static/videos/atari/ms_pacman.gif
+:width: 120px
+:name: MsPacman
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/MsPacman-v5")` |
+
+### Description
+Your goal is to collect all of the pellets on the screen while avoiding the ghosts.
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | UP |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | UPRIGHT |
+| 6 | UPLEFT |
+| 7 | DOWNRIGHT |
+| 8 | DOWNLEFT |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/MsPacman-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| MsPacman | `[0, ..., 3]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("MsPacman-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/name_this_game.md b/docs/environments/atari/name_this_game.md
new file mode 100644
index 000000000..92ecad376
--- /dev/null
+++ b/docs/environments/atari/name_this_game.md
@@ -0,0 +1,78 @@
+---
+title: Name This Game
+---
+# Name This Game
+
+```{figure} ../../_static/videos/atari/name_this_game.gif
+:width: 120px
+:name: NameThisGame
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/NameThisGame-v5")` |
+
+### Description
+Your goal is to defend the treasure that you have discovered. You must fight off a shark and an octopus while keeping an eye on your oxygen supply. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=323).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | RIGHTFIRE |
+| 5 | LEFTFIRE |
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/NameThisGame-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| NameThisGame | `[8, 24, 40]` | `[0, 1]` | `8` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("NameThisGame-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/phoenix.md b/docs/environments/atari/phoenix.md
new file mode 100644
index 000000000..6063ed3f0
--- /dev/null
+++ b/docs/environments/atari/phoenix.md
@@ -0,0 +1,79 @@
+---
+title: Phoenix
+---
+# Phoenix
+
+```{figure} ../../_static/videos/atari/phoenix.gif
+:width: 120px
+:name: Phoenix
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Phoenix-v5")` |
+
+### Description
+Your goal is to reach and shoot the alien pilot. On your way there, you must eliminate waves of war birds while avoiding their bombs. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=355).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | RIGHTFIRE |
+| 6 | LEFTFIRE |
+| 7 | DOWNFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Phoenix-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Phoenix | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Phoenix-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/pitfall.md b/docs/environments/atari/pitfall.md
new file mode 100644
index 000000000..ab5d60afd
--- /dev/null
+++ b/docs/environments/atari/pitfall.md
@@ -0,0 +1,77 @@
+---
+title: Pitfall
+---
+# Pitfall
+
+```{figure} ../../_static/videos/atari/pitfall.gif
+:width: 120px
+:name: Pitfall
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Pitfall-v5")` |
+
+### Description
+You control Pitfall Harry and are tasked with collecting all the treasures in a jungle within 20 minutes. You have three lives. The game is over if you collect all the treasures or if you die or if the time runs out.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You get score points for collecting treasure, you lose points through some misfortunes like falling down a hole.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=360).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Pitfall-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Pitfall | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Pitfall-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/pong.md b/docs/environments/atari/pong.md
new file mode 100644
index 000000000..8788d5448
--- /dev/null
+++ b/docs/environments/atari/pong.md
@@ -0,0 +1,89 @@
+---
+title: Pong
+---
+# Pong
+
+```{figure} ../../_static/videos/atari/pong.gif
+:width: 120px
+:name: Pong
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Pong-v5")` |
+
+### Description
+You control the right paddle, you compete against the left paddle controlled by the computer. You each try to keep deflecting the ball away from your goal and into your opponent's goal.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | RIGHTFIRE |
+| 5 | LEFTFIRE |
+
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You get score points for getting the ball to pass the opponent's paddle. You lose points if the ball passes your paddle.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=587).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Pong-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Pong | `[0, 1]` | `[0, ..., 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Pong-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/pooyan.md b/docs/environments/atari/pooyan.md
new file mode 100644
index 000000000..fbc41bd98
--- /dev/null
+++ b/docs/environments/atari/pooyan.md
@@ -0,0 +1,90 @@
+---
+title: Pooyan
+---
+# Pooyan
+
+```{figure} ../../_static/videos/atari/pooyan.gif
+:width: 120px
+:name: Pooyan
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Pooyan-v5")` |
+
+### Description
+
+You are a mother pig protecting her piglets (Pooyans) from wolves. In the first scene, you can move up and down a rope. Try to shoot the worker's balloons, while guarding yourself from attacks. If the wolves reach the ground safely they will get behind and try to eat you. In the second scene, the wolves try to float up. You have to try and stop them using arrows and bait. You die if a wolf eats you, or a stone or rock hits you.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | DOWN |
+| 4 | UPFIRE |
+| 5 | DOWNFIRE |
+
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+If you hit a balloon, wolf or stone with an arrow you score points.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=372).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Pooyan-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Pooyan | `[10, 30, 50, 70]` | `[0]` | `10` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Pooyan-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/private_eye.md b/docs/environments/atari/private_eye.md
new file mode 100644
index 000000000..ba6d1620e
--- /dev/null
+++ b/docs/environments/atari/private_eye.md
@@ -0,0 +1,77 @@
+---
+title: PrivateEye
+---
+# PrivateEye
+
+```{figure} ../../_static/videos/atari/private_eye.gif
+:width: 120px
+:name: PrivateEye
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/PrivateEye-v5")` |
+
+### Description
+You control the French Private Eye Pierre Touche. Navigate the city streets, parks, secret passages, dead-ends and one-ways in search of the ringleader, Henri Le Fiend and his gang. You also need to find evidence and stolen goods that are scattered about. There are five cases, complete each case before its statute of limitations expires.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You score points for completing your tasks like gathering evidence, nabbing questionable characters or closing cases etc. You lose points if you get hit or if your auto is on a pothole.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=376).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/PrivateEye-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| PrivateEye | `[0, ..., 4]` | `[0, ..., 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("PrivateEye-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/qbert.md b/docs/environments/atari/qbert.md
new file mode 100644
index 000000000..84f898b5f
--- /dev/null
+++ b/docs/environments/atari/qbert.md
@@ -0,0 +1,88 @@
+---
+title: Qbert
+---
+# Qbert
+
+```{figure} ../../_static/videos/atari/qbert.gif
+:width: 120px
+:name: Qbert
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Qbert-v5")` |
+
+### Description
+You are Q*bert. Your goal is to change the color of all the cubes on the pyramid to the pyramid's 'destination' color. To do this, you must hop on each cube on the pyramid one at a time while avoiding nasty creatures that lurk there.
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+You score points for changing color of the cubes to their destination colors or by defeating enemies. You also gain points for completing a level.
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SystemID=2600&SoftwareID=1224&itemTypeID=HTMLMANUAL).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Qbert-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Qbert | `[0]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Qbert-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/riverraid.md b/docs/environments/atari/riverraid.md
new file mode 100644
index 000000000..4d79de80c
--- /dev/null
+++ b/docs/environments/atari/riverraid.md
@@ -0,0 +1,91 @@
+---
+title: Riverraid
+---
+# Riverraid
+
+```{figure} ../../_static/videos/atari/riverraid.gif
+:width: 120px
+:name: Riverraid
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Riverraid-v5")` |
+
+### Description
+You control a jet that flies over a river: you can move it sideways and fire missiles to destroy enemy objects. Each time an enemy object is destroyed you score points (i.e. rewards).
+
+You lose a jet when you run out of fuel: fly over a fuel depot when you begin to run low.
+
+You lose a jet even when it collides with the river bank or one of the enemy objects (except fuel depots).
+
+The game begins with a squadron of three jets in reserve and you're given an additional jet (up to 9) for each 10,000 points you score.
+
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+Score points are your only reward. You get score points each time you destroy an enemy object:
+
+| Enemy Object | Score Points |
+|--------------|--------------|
+| Tanker | 30|
+| Helicopter | 60|
+| Fuel Depot | 80|
+| Jet | 100|
+| Bridge | 500|
+
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=409).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Riverraid-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-----------|--------------------|--------------|
+| Riverraid | `[0]` | `[0,1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Riverraid-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/road_runner.md b/docs/environments/atari/road_runner.md
new file mode 100644
index 000000000..4d487fa1f
--- /dev/null
+++ b/docs/environments/atari/road_runner.md
@@ -0,0 +1,93 @@
+---
+title: Road Runner
+---
+# Road Runner
+
+```{figure} ../../_static/videos/atari/road_runner.gif
+:width: 120px
+:name: RoadRunner
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/RoadRunner-v0")` |
+
+### Description
+You control the Road Runner(TM) in a race; you can control the direction to run in and times to jumps.
+The goal is to outrun Wile E. Coyote(TM) while avoiding the hazards of the desert.
+
+The game begins with three lives. You lose a life when the coyote
+catches you, picks you up in a rocket, or shoots you with a cannon. You also
+lose a life when a truck hits you, you hit a land mine, you fall off a cliff,
+or you get hit by a falling rock.
+
+You score points (i.e. rewards) by eating seeds along the road, eating steel shot, and
+destroying the coyote.
+
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+Score points are your only reward. You get score points each time you:
+
+|actions |points|
+|-----------------------------------------------------------|-----|
+|eat a pile of birdseed |100 |
+|eat steel shot |100 |
+|get the coyote hit by a mine (cannonball, rock, etc.) |200 |
+|get the coyote hit by a truck |1000 |
+
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=412).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/RoadRunner-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------------|------|----------|--------------|
+| RoadRunner | `[0]`| `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("RoadRunner-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/robotank.md b/docs/environments/atari/robotank.md
new file mode 100644
index 000000000..0179de9e1
--- /dev/null
+++ b/docs/environments/atari/robotank.md
@@ -0,0 +1,92 @@
+---
+title: Robot Tank
+---
+# Robot Tank
+
+```{figure} ../../_static/videos/atari/robotank.gif
+:width: 120px
+:name: Robotank
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Robotank-v0")` |
+
+### Description
+You control your Robot Tanks to destroy enemies and avoid enemy fire.
+
+Game ends when all of your Robot Tanks are
+ destroyed or all 12 enemy squadrons are destroyed.
+
+The game begins with one active Robot Tank and three reserves.
+Your Robot Tank may get lost when it is hit by enemy
+ rocket fire - your video scrambles with static interference when this
+ happens - or just become damaged - sensors report the damage by flashing on your control panel (look at V/C/R/T squares).
+
+You earn one bonus Robot Tank for every enemy squadron destroyed. The maximum
+ number of bonus Robot Tanks allowed at any one time is 12.
+
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+The number of enemies destroyed is the only reward.
+
+A small tank appears at the top of your screen for each enemy
+ you destroy. A square with the number 12 appears each time a squadron of twelve enemies are
+ destroyed.
+
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=416).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Robotank-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------|--------------------|--------------|
+| Robotank | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Robotank-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/seaquest.md b/docs/environments/atari/seaquest.md
new file mode 100644
index 000000000..b5d63c8af
--- /dev/null
+++ b/docs/environments/atari/seaquest.md
@@ -0,0 +1,102 @@
+---
+title: Seaquest
+---
+# Seaquest
+
+```{figure} ../../_static/videos/atari/seaquest.gif
+:width: 120px
+:name: Seaquest
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Seaquest-v0")` |
+
+### Description
+You control a sub able to move in all directions and fire torpedoes.
+The goal is to retrieve as many divers as you
+can, while dodging and blasting enemy subs and killer sharks; points will be awarded accordingly.
+
+The game begins with one sub and three waiting on the horizon. Each time you
+increase your score by 10,000 points, an extra sub will be delivered to your
+base. You can only have six reserve subs on the screen at one time.
+
+Your sub will explode if it collides with anything
+except your own divers.
+
+The sub has a limited amount of oxygen that decreases at a constant rate during the game. When the oxygen
+tank is almost empty, you need to surface and if you don't do it in
+time, yoursub will blow up and you'll lose one diver. Each time you're forced
+to surface, with less than six divers, you lose one diver as well.
+
+Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+Score points are your only reward.
+
+Blasting enemy sub and killer shark is worth
+20 points. Every time you surface with six divers, the value of enemy subs
+and killer sharks increases by 10, up to a maximum of 90 points each.
+
+Rescued divers start at 50 points each. Then, their point value increases by 50, every
+time you surface, up to a maximum of 1000 points each.
+
+You'll be further rewarded with bonus points for all the oxygen you have remaining the
+moment you surface. The more oxygen you have left, the more bonus points
+you're given.
+
+For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=424).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Seaquest-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|------------------------------|---------------|--------------|
+| Seaquest | `[0]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Seaquest-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/skiing.md b/docs/environments/atari/skiing.md
new file mode 100644
index 000000000..3c048ac3f
--- /dev/null
+++ b/docs/environments/atari/skiing.md
@@ -0,0 +1,93 @@
+---
+title: Skiing
+---
+# Skiings
+
+```{figure} ../../_static/videos/atari/skiing.gif
+:width: 120px
+:name: Skiing
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Skiing-v0")` |
+
+### Description
+You control a skier who can move sideways.
+
+The goal is to run through all gates (between the poles) in the fastest time.
+You are penalized five seconds for each gate you miss.
+
+If you hit a gate or a tree, your skier will jump back up
+and keep going. But you do lose time, so be careful!
+
+Detailed documentation can be found on [the AtariAge page [SLALOM RACING section]](https://atariage.com/manual_html_page.php?SoftwareLabelID=434)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend
+on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------|
+| 0 | NOOP |
+| 1 | RIGHT |
+| 2 | LEFT |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+Seconds are your only rewards - negative rewards and penalties (e.g. missing a gate) are assigned as additional seconds.
+
+For a more detailed documentation, see [the AtariAge page [SLALOM RACING section]](https://atariage.com/manual_html_page.php?SoftwareLabelID=434).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Skiing-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|------------------------------|---------------|--------------|
+| Skiing | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Skiing-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/solaris.md b/docs/environments/atari/solaris.md
new file mode 100644
index 000000000..420822a14
--- /dev/null
+++ b/docs/environments/atari/solaris.md
@@ -0,0 +1,75 @@
+---
+title: Solaris
+---
+# Solaris
+
+```{figure} ../../_static/videos/atari/solaris.gif
+:width: 120px
+:name: Solaris
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Solaris-v5")` |
+
+### Description
+You control a spaceship. Blast enemies before they can blast you. You can warp to different sectors. You have to defend Federation planets, and destroy Zylon forces. Keep track of your fuel, if you run out you lose a life. Warp to a Federation planet to refuel. The game ends if all your ships are destroyed or if you reach the Solaris planet. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+
+You gain points for destroying enemies, rescuing cadets, making it through a corridor, destroying enemy planets etc. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=450).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Solaris-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Solaris | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Solaris-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/space_invaders.md b/docs/environments/atari/space_invaders.md
new file mode 100644
index 000000000..646ed36c8
--- /dev/null
+++ b/docs/environments/atari/space_invaders.md
@@ -0,0 +1,88 @@
+---
+title: SpaceInvaders
+---
+# SpaceInvaders
+
+```{figure} ../../_static/videos/atari/space_invaders.gif
+:width: 120px
+:name: SpaceInvaders
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/SpaceInvaders-v5")` |
+
+### Description
+
+Your objective is to destroy the space invaders by shooting your laser cannon at them before they reach the Earth. The game ends when all your lives are lost after taking enemy fire, or when they reach the earth. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | RIGHTFIRE |
+| 5 | LEFTFIRE |
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+
+You gain points for destroying space invaders. The invaders in the back rows are worth more points. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=460).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/SpaceInvaders-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| SpaceInvaders | `[0, ..., 15]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("SpaceInvaders-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/star_gunner.md b/docs/environments/atari/star_gunner.md
new file mode 100644
index 000000000..8267d2877
--- /dev/null
+++ b/docs/environments/atari/star_gunner.md
@@ -0,0 +1,89 @@
+---
+title: StarGunner
+---
+# StarGunner
+
+```{figure} ../../_static/videos/atari/star_gunner.gif
+:width: 120px
+:name: StarGunner
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/StarGunner-v5")` |
+
+### Description
+
+Stop the alien invasion by shooting down alien saucers and creatures while avoiding bombs. More details can be found on [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+
+You score points for destroying enemies. You get bonus points for clearing a wave and a level. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-stargunner_16921.html).
+
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/StarGunner-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| StarGunner | `[0, ..., 3]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("StarGunner-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/tennis.md b/docs/environments/atari/tennis.md
new file mode 100644
index 000000000..bb635d7f3
--- /dev/null
+++ b/docs/environments/atari/tennis.md
@@ -0,0 +1,78 @@
+---
+title: Tennis
+---
+# Tennis
+
+```{figure} ../../_static/videos/atari/tennis.gif
+:width: 120px
+:name: Tennis
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Tennis-v5")` |
+
+### Description
+
+You control the orange player playing against a computer-controlled blue player. The game follows the rules of tennis.
+The first player to win at least 6 games with a margin of at least two games wins the match. If the score is tied at 6-6, the first player to go 2 games up wins the match. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+Even if you use v0 or v4 or specify `full_action_space=False` during initialization, all actions will be available in the default flavor.
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Rewards
+
+The scoring is as per the sport of tennis, played till one set. For a more detailed documentation, see [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=555).
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Tennis-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Tennis | `[0, 2]` | `[0, ..., 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Tennis-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/time_pilot.md b/docs/environments/atari/time_pilot.md
new file mode 100644
index 000000000..48c63d906
--- /dev/null
+++ b/docs/environments/atari/time_pilot.md
@@ -0,0 +1,93 @@
+---
+title: TimePilot
+---
+# TimePilot
+
+```{figure} ../../_static/videos/atari/time_pilot.gif
+:width: 120px
+:name: TimePilot
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/TimePilot-v5")` |
+
+### Description
+
+You control an aircraft. Use it to destroy your enemies. As you progress in the game, you encounter enemies with technology that is increasingly from the future. More details can be found on [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html)
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment.
+However, if you use v0 or v4 or specify `full_action_space=False` during initialization, only a reduced
+number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of `mode` and `difficulty`). The reduced action space for the default
+flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPFIRE |
+| 7 | RIGHTFIRE |
+| 8 | LEFTFIRE |
+| 9 | DOWNFIRE |
+
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+respectively. The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+
+### Rewards
+
+You score points for destroying enemies, gaining more points for difficult enemies. For a more detailed documentation, see [the Atari Mania page](http://www.atarimania.com/game-atari-2600-vcs-time-pilot_8038.html).
+
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/TimePilot-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| TimePilot | `[0]` | `[0, 1, 2]` | `0` |
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "NoFrameskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("TimePilot-v0")`.
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
+
diff --git a/docs/environments/atari/tutankham.md b/docs/environments/atari/tutankham.md
new file mode 100644
index 000000000..dabeaef5d
--- /dev/null
+++ b/docs/environments/atari/tutankham.md
@@ -0,0 +1,79 @@
+---
+title: Tutankham
+---
+# Tutankham
+
+```{figure} ../../_static/videos/atari/tutankham.gif
+:width: 120px
+:name: Tutankham
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Tutankham-v5")` |
+
+### Description
+Your goal is to rack up points by finding treasures in the mazes of the tomb while eliminating its guardians. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_thumbs.php?SoftwareLabelID=572).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | UP |
+| 2 | RIGHT |
+| 3 | LEFT |
+| 4 | DOWN |
+| 5 | UPFIRE |
+| 6 | RIGHTFIRE |
+| 7 | LEFTFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Tutankham-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Tutankham | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Tutankham-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/up_n_down.md b/docs/environments/atari/up_n_down.md
new file mode 100644
index 000000000..bf5376581
--- /dev/null
+++ b/docs/environments/atari/up_n_down.md
@@ -0,0 +1,78 @@
+---
+title: Up n' Down
+---
+# Up n' Down
+
+```{figure} ../../_static/videos/atari/up_n_down.gif
+:width: 120px
+:name: UpNDown
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/UpNDown-v5")` |
+
+### Description
+Your goal is to steer your baja bugger to collect prizes and eliminate opponents. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=574).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | DOWN |
+| 4 | UPFIRE |
+| 5 | DOWNFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/UpNDown-v5")
+```
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| UpNDown | `[0]` | `[0, ..., 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("UpNDown-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/venture.md b/docs/environments/atari/venture.md
new file mode 100644
index 000000000..970c56fa9
--- /dev/null
+++ b/docs/environments/atari/venture.md
@@ -0,0 +1,70 @@
+---
+title: Venture
+---
+# Venture
+
+```{figure} ../../_static/videos/atari/venture.gif
+:width: 120px
+:name: Venture
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Venture-v5")` |
+
+### Description
+Your goal is to capture the treasure in every chamber of the dungeon while eliminating the monsters. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=576).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify full_action_space=False during initialization, all actions will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Venture-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Venture | `[0]` | `[0, ..., 3]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Venture-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/video_pinball.md b/docs/environments/atari/video_pinball.md
new file mode 100644
index 000000000..e1364d905
--- /dev/null
+++ b/docs/environments/atari/video_pinball.md
@@ -0,0 +1,82 @@
+---
+title: Video Pinball
+---
+# Video Pinball
+
+```{figure} ../../_static/videos/atari/video_pinball.gif
+:width: 120px
+:name: VideoPinball
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/VideoPinball-v5")` |
+
+### Description
+Your goal is to keep the ball in play as long as possible and to score as many points as possible. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=588).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPFIRE |
+| 7 | RIGHTFIRE |
+| 8 | LEFTFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/'VideoPinball-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| VideoPinball | `[0, ..., 2]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("VideoPinball-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/wizard_of_wor.md b/docs/environments/atari/wizard_of_wor.md
new file mode 100644
index 000000000..05e189b8a
--- /dev/null
+++ b/docs/environments/atari/wizard_of_wor.md
@@ -0,0 +1,83 @@
+---
+title: Wizard of Wor
+---
+# Wizard of Wor
+
+```{figure} ../../_static/videos/atari/wizard_of_wor.gif
+:width: 120px
+:name: WizardOfWor
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/WizardOfWor-v5")` |
+
+### Description
+Your goal is to beat the Wizard using your laser and radar scanner. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=598).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. However, if you use v0 or v4 or specify full_action_space=False during initialization, only a reduced number of actions (those that are meaningful in this game) are available. The reduced action space may depend on the flavor of the environment (the combination of mode and difficulty). The reduced action space for the default flavor looks like this:
+
+| Num | Action |
+|-----|------------------------|
+| 0 | NOOP |
+| 1 | FIRE |
+| 2 | UP |
+| 3 | RIGHT |
+| 4 | LEFT |
+| 5 | DOWN |
+| 6 | UPFIRE |
+| 7 | RIGHTFIRE |
+| 8 | LEFTFIRE |
+| 9 | DOWNFIRE |
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/WizardOfWor-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+It is possible to specify various flavors of the environment via the keyword arguments `difficulty` and `mode`.
+A flavor is a combination of a game mode and a difficulty setting.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| VideoPinball | `[0]` | `[0, 1]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("WizardOfWor-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/atari/zaxxon.md b/docs/environments/atari/zaxxon.md
new file mode 100644
index 000000000..ac1e52c99
--- /dev/null
+++ b/docs/environments/atari/zaxxon.md
@@ -0,0 +1,69 @@
+---
+title: Zaxxon
+lastpage:
+---
+# Zaxxon
+
+```{figure} ../../_static/videos/atari/zaxxon.gif
+:width: 120px
+:name: Zaxxon
+```
+
+This environment is part of the Atari environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(18) |
+| Observation Space | (210, 160, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("ALE/Zaxxon-v5")` |
+
+### Description
+Your goal is to stop the evil robot Zaxxon and its armies from enslaving the galaxy by piloting your fighter and shooting enemies. Detailed documentation can be found on [the AtariAge page](https://atariage.com/manual_html_page.php?SoftwareLabelID=606).
+
+### Actions
+By default, all actions that can be performed on an Atari 2600 are available in this environment. Even if you use v0 or v4 or specify full_action_space=False during initialization, all actions will be available in the default flavor.
+
+### Observations
+By default, the environment returns the RGB image that is displayed to human players as an observation. However, it is
+possible to observe
+- The 128 Bytes of RAM of the console
+- A grayscale image
+
+instead. The respective observation spaces are
+- `Box([0 ... 0], [255 ... 255], (128,), uint8)`
+- `Box([[0 ... 0]
+ ...
+ [0 ... 0]], [[255 ... 255]
+ ...
+ [255 ... 255]], (250, 160), uint8)
+`
+
+The general article on Atari environments outlines different ways to instantiate corresponding environments
+via `gymnasium.make`.
+
+### Arguments
+
+```
+env = gymnasium.make("ALE/Zaxxon-v5")
+```
+
+The various ways to configure the environment are described in detail in the article on Atari environments.
+
+| Environment | Valid Modes | Valid Difficulties | Default Mode |
+|------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|--------------|
+| Zaxxon | `[0]` | `[0]` | `0` |
+
+You may use the suffix "-ram" to switch to the RAM observation space. In v0 and v4, the suffixes "Deterministic" and "Noframeskip"
+are available. These are no longer supported in v5. In order to obtain equivalent behavior, pass keyword arguments to `gymnasium.make` as outlined in
+the general article on Atari environments.
+The versions v0 and v4 are not contained in the "ALE" namespace. I.e. they are instantiated via `gymnasium.make("Zaxxon-v0")`
+
+### Version History
+A thorough discussion of the intricate differences between the versions and configurations can be found in the
+general article on Atari environments.
+
+* v5: Stickiness was added back and stochastic frameskipping was removed. The entire action space is used by default. The environments are now in the "ALE" namespace.
+* v4: Stickiness of actions was removed
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/box2d/bipedal_walker.md b/docs/environments/box2d/bipedal_walker.md
new file mode 100644
index 000000000..762a51f7f
--- /dev/null
+++ b/docs/environments/box2d/bipedal_walker.md
@@ -0,0 +1,83 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Bipedal Walker
+firstpage:
+---
+
+# Bipedal Walker
+
+```{figure} ../../_static/videos/box2d/bipedal_walker.gif
+:width: 200px
+:name: bipedal_walker
+```
+
+This environment is part of the Box2D environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-1.0, 1.0, (4,), float32) |
+| Observation Shape | (24,) |
+| Observation High | [3.14 5. 5. 5. 3.14 5. 3.14 5. 5. 3.14 5. 3.14 5. 5. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ] |
+| Observation Low | [-3.14 -5. -5. -5. -3.14 -5. -3.14 -5. -0. -3.14 -5. -3.14 -5. -0. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. ] |
+| Import | `gymnasium.make("BipedalWalker-v3")` |
+
+
+### Description
+This is a simple 4-joint walker robot environment.
+There are two versions:
+- Normal, with slightly uneven terrain.
+- Hardcore, with ladders, stumps, pitfalls.
+
+To solve the normal version, you need to get 300 points in 1600 time steps.
+To solve the hardcore version, you need 300 points in 2000 time steps.
+
+A heuristic is provided for testing. It's also useful to get demonstrations
+to learn from. To run the heuristic:
+```
+python gymnasium/envs/box2d/bipedal_walker.py
+```
+
+### Action Space
+Actions are motor speed values in the [-1, 1] range for each of the
+4 joints at both hips and knees.
+
+### Observation Space
+State consists of hull angle speed, angular velocity, horizontal speed,
+vertical speed, position of joints and joints angular speed, legs contact
+with ground, and 10 lidar rangefinder measurements. There are no coordinates
+in the state vector.
+
+### Rewards
+Reward is given for moving forward, totaling 300+ points up to the far end.
+If the robot falls, it gets -100. Applying motor torque costs a small
+amount of points. A more optimal agent will get a better score.
+
+### Starting State
+The walker starts standing at the left end of the terrain with the hull
+horizontal, and both legs in the same position with a slight knee angle.
+
+### Episode Termination
+The episode will terminate if the hull gets in contact with the ground or
+if the walker exceeds the right end of the terrain length.
+
+### Arguments
+To use to the _hardcore_ environment, you need to specify the
+`hardcore=True` argument like below:
+```python
+import gymnasium
+env = gymnasium.make("BipedalWalker-v3", hardcore=True)
+```
+
+### Version History
+- v3: returns closest lidar trace instead of furthest;
+ faster video recording
+- v2: Count energy spent
+- v1: Legs now report contact with ground; motors have higher torque and
+ speed; ground has higher friction; lidar rendered less nervously.
+- v0: Initial version
+
+
+
+
+### Credits
+Created by Oleg Klimov
diff --git a/docs/environments/box2d/car_racing.md b/docs/environments/box2d/car_racing.md
new file mode 100644
index 000000000..8c039c4b5
--- /dev/null
+++ b/docs/environments/box2d/car_racing.md
@@ -0,0 +1,97 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Car Racing
+---
+
+# Car Racing
+
+```{figure} ../../_static/videos/box2d/car_racing.gif
+:width: 200px
+:name: car_racing
+```
+
+This environment is part of the Box2D environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box([-1. 0. 0.], 1.0, (3,), float32) |
+| Observation Shape | (96, 96, 3) |
+| Observation High | 255 |
+| Observation Low | 0 |
+| Import | `gymnasium.make("CarRacing-v2")` |
+
+
+### Description
+The easiest control task to learn from pixels - a top-down
+racing environment. The generated track is random every episode.
+
+Some indicators are shown at the bottom of the window along with the
+state RGB buffer. From left to right: true speed, four ABS sensors,
+steering wheel position, and gyroscope.
+To play yourself (it's rather fast for humans), type:
+```
+python gymnasium/envs/box2d/car_racing.py
+```
+Remember: it's a powerful rear-wheel drive car - don't press the accelerator
+and turn at the same time.
+
+### Action Space
+If continuous:
+ There are 3 actions: steering (-1 is full left, +1 is full right), gas, and breaking.
+If discrete:
+ There are 5 actions: do nothing, steer left, steer right, gas, brake.
+
+### Observation Space
+State consists of 96x96 pixels.
+
+### Rewards
+The reward is -0.1 every frame and +1000/N for every track tile visited,
+where N is the total number of tiles visited in the track. For example,
+if you have finished in 732 frames, your reward is
+1000 - 0.1*732 = 926.8 points.
+
+### Starting State
+The car starts at rest in the center of the road.
+
+### Episode Termination
+The episode finishes when all of the tiles are visited. The car can also go
+outside of the playfield - that is, far off the track, in which case it will
+receive -100 reward and die.
+
+### Arguments
+`lap_complete_percent` dictates the percentage of tiles that must be visited by
+the agent before a lap is considered complete.
+
+Passing `domain_randomize=True` enables the domain randomized variant of the environment.
+In this scenario, the background and track colours are different on every reset.
+
+Passing `continuous=False` converts the environment to use discrete action space.
+The discrete action space has 5 actions: [do nothing, left, right, gas, brake].
+
+### Reset Arguments
+Passing the option `options["randomize"] = True` will change the current colour of the environment on demand.
+Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment.
+`domain_randomize` must be `True` on init for this argument to work.
+Example usage:
+```py
+ env = gymnasium.make("CarRacing-v1", domain_randomize=True)
+
+ # normal reset, this changes the colour scheme by default
+ env.reset()
+
+ # reset with colour scheme change
+ env.reset(options={"randomize": True})
+
+ # reset with no colour scheme change
+ env.reset(options={"randomize": False})
+```
+
+### Version History
+- v1: Change track completion logic and add domain randomization (0.24.0)
+- v0: Original version
+
+### References
+- Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.
+
+### Credits
+Created by Oleg Klimov
diff --git a/docs/environments/box2d/index.html b/docs/environments/box2d/index.html
new file mode 100644
index 000000000..1a16f1c17
--- /dev/null
+++ b/docs/environments/box2d/index.html
@@ -0,0 +1,41 @@
+
+
+
+
\ No newline at end of file
diff --git a/docs/environments/box2d/index.md b/docs/environments/box2d/index.md
new file mode 100644
index 000000000..bfffe20a0
--- /dev/null
+++ b/docs/environments/box2d/index.md
@@ -0,0 +1,26 @@
+---
+firstpage:
+lastpage:
+---
+
+## Box2D
+
+```{toctree}
+:hidden:
+
+bipedal_walker
+car_racing
+lunar_lander
+```
+
+```{raw} html
+ :file: index.html
+```
+
+These environments all involve toy games based around physics control, using [box2d](https://box2d.org/) based physics and PyGame based rendering. These environments were contributed back in the early days of Gymnasium by Oleg Klimov, and have become popular toy benchmarks ever since. All environments are highly configurable via arguments specified in each environment's documentation.
+
+The unique dependencies for this set of environments can be installed via:
+
+````bash
+pip install gymnasium[box2d]
+````
diff --git a/docs/environments/box2d/lunar_lander.md b/docs/environments/box2d/lunar_lander.md
new file mode 100644
index 000000000..861cfec4f
--- /dev/null
+++ b/docs/environments/box2d/lunar_lander.md
@@ -0,0 +1,125 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Lunar Lander
+lastpage:
+---
+
+# Lunar Lander
+
+```{figure} ../../_static/videos/box2d/lunar_lander.gif
+:width: 200px
+:name: lunar_lander
+```
+
+This environment is part of the Box2D environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(4) |
+| Observation Shape | (8,) |
+| Observation High | [1.5 1.5 5. 5. 3.14 5. 1. 1. ] |
+| Observation Low | [-1.5 -1.5 -5. -5. -3.14 -5. -0. -0. ] |
+| Import | `gymnasium.make("LunarLander-v2")` |
+
+
+### Description
+This environment is a classic rocket trajectory optimization problem.
+According to Pontryagin's maximum principle, it is optimal to fire the
+engine at full throttle or turn it off. This is the reason why this
+environment has discrete actions: engine on or off.
+
+There are two environment versions: discrete or continuous.
+The landing pad is always at coordinates (0,0). The coordinates are the
+first two numbers in the state vector.
+Landing outside of the landing pad is possible. Fuel is infinite, so an agent
+can learn to fly and then land on its first attempt.
+
+To see a heuristic landing, run:
+```
+python gymnasium/envs/box2d/lunar_lander.py
+```
+
+
+
+### Action Space
+There are four discrete actions available: do nothing, fire left
+orientation engine, fire main engine, fire right orientation engine.
+
+### Observation Space
+The state is an 8-dimensional vector: the coordinates of the lander in `x` & `y`, its linear
+velocities in `x` & `y`, its angle, its angular velocity, and two booleans
+that represent whether each leg is in contact with the ground or not.
+
+### Rewards
+Reward for moving from the top of the screen to the landing pad and coming
+to rest is about 100-140 points.
+If the lander moves away from the landing pad, it loses reward.
+If the lander crashes, it receives an additional -100 points. If it comes
+to rest, it receives an additional +100 points. Each leg with ground
+contact is +10 points.
+Firing the main engine is -0.3 points each frame. Firing the side engine
+is -0.03 points each frame. Solved is 200 points.
+
+### Starting State
+The lander starts at the top center of the viewport with a random initial
+force applied to its center of mass.
+
+### Episode Termination
+The episode finishes if:
+1) the lander crashes (the lander body gets in contact with the moon);
+2) the lander gets outside of the viewport (`x` coordinate is greater than 1);
+3) the lander is not awake. From the [Box2D docs](https://box2d.org/documentation/md__d_1__git_hub_box2d_docs_dynamics.html#autotoc_md61),
+ a body which is not awake is a body which doesn't move and doesn't
+ collide with any other body:
+> When Box2D determines that a body (or group of bodies) has come to rest,
+> the body enters a sleep state which has very little CPU overhead. If a
+> body is awake and collides with a sleeping body, then the sleeping body
+> wakes up. Bodies will also wake up if a joint or contact attached to
+> them is destroyed.
+
+### Arguments
+To use to the _continuous_ environment, you need to specify the
+`continuous=True` argument like below:
+```python
+import gymnasium
+env = gymnasium.make(
+ "LunarLander-v2",
+ continuous: bool = False,
+ gravity: float = -10.0,
+ enable_wind: bool = False,
+ wind_power: float = 15.0,
+ turbulence_power: float = 1.5,
+)
+```
+If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the
+action space will be `Box(-1, +1, (2,), dtype=np.float32)`.
+The first coordinate of an action determines the throttle of the main engine, while the second
+coordinate specifies the throttle of the lateral boosters.
+Given an action `np.array([main, lateral])`, the main engine will be turned off completely if
+`main < 0` and the throttle scales affinely from 50% to 100% for `0 <= main <= 1` (in particular, the
+main engine doesn't work with less than 50% power).
+Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left
+booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely
+from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively).
+
+`gravity` dictates the gravitational constant, this is bounded to be within 0 and -12.
+
+If `enable_wind=True` is passed, there will be wind effects applied to the lander.
+The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`.
+`k` is set to 0.01.
+`C` is sampled randomly between -9999 and 9999.
+
+`wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0.
+`turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0.
+
+### Version History
+- v2: Count energy spent and in v0.24, added turbulance with wind power and turbulence_power parameters
+- v1: Legs contact with ground added in state vector; contact with ground
+ give +10 reward points, and -10 if then lose contact; reward
+ renormalized to 200; harder initial random push.
+- v0: Initial version
+
+
+
+### Credits
+Created by Oleg Klimov
diff --git a/docs/environments/classic_control/acrobot.md b/docs/environments/classic_control/acrobot.md
new file mode 100644
index 000000000..5c5269546
--- /dev/null
+++ b/docs/environments/classic_control/acrobot.md
@@ -0,0 +1,132 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Acrobot
+firstpage:
+---
+
+# Acrobot
+
+```{figure} ../../_static/videos/classic_control/acrobot.gif
+:width: 200px
+:name: acrobot
+```
+
+This environment is part of the Classic Control environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(3) |
+| Observation Shape | (6,) |
+| Observation High | [ 1. 1. 1. 1. 12.57 28.27] |
+| Observation Low | [ -1. -1. -1. -1. -12.57 -28.27] |
+| Import | `gymnasium.make("Acrobot-v1")` |
+
+
+### Description
+
+The Acrobot environment is based on Sutton's work in
+["Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding"](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html)
+and [Sutton and Barto's book](http://www.incompleteideas.net/book/the-book-2nd.html).
+The system consists of two links connected linearly to form a chain, with one end of
+the chain fixed. The joint between the two links is actuated. The goal is to apply
+torques on the actuated joint to swing the free end of the linear chain above a
+given height while starting from the initial state of hanging downwards.
+
+As seen in the **Gif**: two blue links connected by two green joints. The joint in
+between the two links is actuated. The goal is to swing the free end of the outer-link
+to reach the target height (black horizontal line above system) by applying torque on
+the actuator.
+
+### Action Space
+
+The action is discrete, deterministic, and represents the torque applied on the actuated
+joint between the two links.
+
+| Num | Action | Unit |
+|-----|---------------------------------------|--------------|
+| 0 | apply -1 torque to the actuated joint | torque (N m) |
+| 1 | apply 0 torque to the actuated joint | torque (N m) |
+| 2 | apply 1 torque to the actuated joint | torque (N m) |
+
+### Observation Space
+
+The observation is a `ndarray` with shape `(6,)` that provides information about the
+two rotational joint angles as well as their angular velocities:
+
+| Num | Observation | Min | Max |
+|-----|------------------------------|---------------------|-------------------|
+| 0 | Cosine of `theta1` | -1 | 1 |
+| 1 | Sine of `theta1` | -1 | 1 |
+| 2 | Cosine of `theta2` | -1 | 1 |
+| 3 | Sine of `theta2` | -1 | 1 |
+| 4 | Angular velocity of `theta1` | ~ -12.567 (-4 * pi) | ~ 12.567 (4 * pi) |
+| 5 | Angular velocity of `theta2` | ~ -28.274 (-9 * pi) | ~ 28.274 (9 * pi) |
+
+where
+- `theta1` is the angle of the first joint, where an angle of 0 indicates the first link is pointing directly
+downwards.
+- `theta2` is ***relative to the angle of the first link.***
+ An angle of 0 corresponds to having the same angle between the two links.
+
+The angular velocities of `theta1` and `theta2` are bounded at ±4π, and ±9π rad/s respectively.
+A state of `[1, 0, 1, 0, ..., ...]` indicates that both links are pointing downwards.
+
+### Rewards
+
+The goal is to have the free end reach a designated target height in as few steps as possible,
+and as such all steps that do not reach the goal incur a reward of -1.
+Achieving the target height results in termination with a reward of 0. The reward threshold is -100.
+
+### Starting State
+
+Each parameter in the underlying state (`theta1`, `theta2`, and the two angular velocities) is initialized
+uniformly between -0.1 and 0.1. This means both links are pointing downwards with some initial stochasticity.
+
+### Episode End
+
+The episode ends if one of the following occurs:
+1. Termination: The free end reaches the target height, which is constructed as:
+`-cos(theta1) - cos(theta2 + theta1) > 1.0`
+2. Truncation: Episode length is greater than 500 (200 for v0)
+
+### Arguments
+
+No additional arguments are currently supported.
+
+```
+env = gymnasium.make('Acrobot-v1')
+```
+
+By default, the dynamics of the acrobot follow those described in Sutton and Barto's book
+[Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html).
+However, a `book_or_nips` parameter can be modified to change the pendulum dynamics to those described
+in the original [NeurIPS paper](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html).
+
+```
+# To change the dynamics as described above
+env.env.book_or_nips = 'nips'
+```
+
+See the following note and
+the [implementation](https://github.com/Farama-Foundation/gymnasium/blob/master/gymnasium/envs/classic_control/acrobot.py) for details:
+
+> The dynamics equations were missing some terms in the NIPS paper which
+ are present in the book. R. Sutton confirmed in personal correspondence
+ that the experimental results shown in the paper and the book were
+ generated with the equations shown in the book.
+ However, there is the option to run the domain with the paper equations
+ by setting `book_or_nips = 'nips'`
+
+
+### Version History
+
+- v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of
+`theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the
+sine and cosine of each angle instead.
+- v0: Initial versions release (1.0.0) (removed from gymnasium for v1)
+
+### References
+- Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding.
+ In D. Touretzky, M. C. Mozer, & M. Hasselmo (Eds.), Advances in Neural Information Processing Systems (Vol. 8).
+ MIT Press. https://proceedings.neurips.cc/paper/1995/file/8f1d43620bc6bb580df6e80b0dc05c48-Paper.pdf
+- Sutton, R. S., Barto, A. G. (2018 ). Reinforcement Learning: An Introduction. The MIT Press.
diff --git a/docs/environments/classic_control/cart_pole.md b/docs/environments/classic_control/cart_pole.md
new file mode 100644
index 000000000..3ce9d5107
--- /dev/null
+++ b/docs/environments/classic_control/cart_pole.md
@@ -0,0 +1,86 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Cart Pole
+---
+
+# Cart Pole
+
+```{figure} ../../_static/videos/classic_control/cart_pole.gif
+:width: 200px
+:name: cart_pole
+```
+
+This environment is part of the Classic Control environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(2) |
+| Observation Shape | (4,) |
+| Observation High | [4.8 inf 0.42 inf] |
+| Observation Low | [-4.8 -inf -0.42 -inf] |
+| Import | `gymnasium.make("CartPole-v1")` |
+
+
+### Description
+
+This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson in
+["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077).
+A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
+The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces
+ in the left and right direction on the cart.
+
+### Action Space
+
+The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction
+ of the fixed force the cart is pushed with.
+
+| Num | Action |
+|-----|------------------------|
+| 0 | Push cart to the left |
+| 1 | Push cart to the right |
+
+**Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle
+ the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it
+
+### Observation Space
+
+The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities:
+
+| Num | Observation | Min | Max |
+|-----|-----------------------|---------------------|-------------------|
+| 0 | Cart Position | -4.8 | 4.8 |
+| 1 | Cart Velocity | -Inf | Inf |
+| 2 | Pole Angle | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) |
+| 3 | Pole Angular Velocity | -Inf | Inf |
+
+**Note:** While the ranges above denote the possible values for observation space of each element,
+ it is not reflective of the allowed values of the state space in an unterminated episode. Particularly:
+- The cart x-position (index 0) can be take values between `(-4.8, 4.8)`, but the episode terminates
+ if the cart leaves the `(-2.4, 2.4)` range.
+- The pole angle can be observed between `(-.418, .418)` radians (or **±24°**), but the episode terminates
+ if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**)
+
+### Rewards
+
+Since the goal is to keep the pole upright for as long as possible, a reward of `+1` for every step taken,
+including the termination step, is allotted. The threshold for rewards is 475 for v1.
+
+### Starting State
+
+All observations are assigned a uniformly random value in `(-0.05, 0.05)`
+
+### Episode End
+
+The episode ends if any one of the following occurs:
+
+1. Termination: Pole Angle is greater than ±12°
+2. Termination: Cart Position is greater than ±2.4 (center of the cart reaches the edge of the display)
+3. Truncation: Episode length is greater than 500 (200 for v0)
+
+### Arguments
+
+```
+gymnasium.make('CartPole-v1')
+```
+
+No additional arguments are currently supported.
diff --git a/docs/environments/classic_control/index.html b/docs/environments/classic_control/index.html
new file mode 100644
index 000000000..c96e2ca2b
--- /dev/null
+++ b/docs/environments/classic_control/index.html
@@ -0,0 +1,65 @@
+
+
+
+
\ No newline at end of file
diff --git a/docs/environments/classic_control/index.md b/docs/environments/classic_control/index.md
new file mode 100644
index 000000000..e705db21f
--- /dev/null
+++ b/docs/environments/classic_control/index.md
@@ -0,0 +1,32 @@
+---
+firstpage:
+lastpage:
+---
+
+## Classic Control
+
+```{toctree}
+:hidden:
+
+acrobot
+cart_pole
+mountain_car_continuous
+mountain_car
+pendulum
+```
+
+```{raw} html
+ :file: index.html
+```
+
+The unique dependencies for this set of environments can be installed via:
+
+````bash
+pip install gymnasium[classic_control]
+````
+
+There are five classic control environments: Acrobot, CartPole, Mountain Car, Continuous Mountain Car, and Pendulum. All of these environments are stochastic in terms of their initial state, within a given range. In addition, Acrobot has noise applied to the taken action. Also, regarding the both mountain car environments, the cars are under powered to climb the mountain, so it takes some effort to reach the top.
+
+Among Gymnasium environments, this set of environments can be considered as easier ones to solve by a policy.
+
+All environments are highly configurable via arguments specified in each environment's documentation.
diff --git a/docs/environments/classic_control/mountain_car.md b/docs/environments/classic_control/mountain_car.md
new file mode 100644
index 000000000..59fcc5903
--- /dev/null
+++ b/docs/environments/classic_control/mountain_car.md
@@ -0,0 +1,101 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Mountain Car
+---
+
+# Mountain Car
+
+```{figure} ../../_static/videos/classic_control/mountain_car.gif
+:width: 200px
+:name: mountain_car
+```
+
+This environment is part of the Classic Control environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(3) |
+| Observation Shape | (2,) |
+| Observation High | [0.6 0.07] |
+| Observation Low | [-1.2 -0.07] |
+| Import | `gymnasium.make("MountainCar-v0")` |
+
+
+### Description
+
+The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically
+at the bottom of a sinusoidal valley, with the only possible actions being the accelerations
+that can be applied to the car in either direction. The goal of the MDP is to strategically
+accelerate the car to reach the goal state on top of the right hill. There are two versions
+of the mountain car domain in gymnasium: one with discrete actions and one with continuous.
+This version is the one with discrete actions.
+
+This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf)
+
+```
+@TECHREPORT{Moore90efficientmemory-based,
+ author = {Andrew William Moore},
+ title = {Efficient Memory-based Learning for Robot Control},
+ institution = {University of Cambridge},
+ year = {1990}
+}
+```
+
+### Observation Space
+
+The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Unit |
+|-----|--------------------------------------|------|-----|--------------|
+| 0 | position of the car along the x-axis | -Inf | Inf | position (m) |
+| 1 | velocity of the car | -Inf | Inf | position (m) |
+
+### Action Space
+
+There are 3 discrete deterministic actions:
+
+| Num | Observation | Value | Unit |
+|-----|-------------------------|-------|--------------|
+| 0 | Accelerate to the left | Inf | position (m) |
+| 1 | Don't accelerate | Inf | position (m) |
+| 2 | Accelerate to the right | Inf | position (m) |
+
+### Transition Dynamics:
+
+Given an action, the mountain car follows the following transition dynamics:
+
+*velocityt+1 = velocityt + (action - 1) * force - cos(3 * positiont) * gravity*
+
+*positiont+1 = positiont + velocityt+1*
+
+where force = 0.001 and gravity = 0.0025. The collisions at either end are inelastic with the velocity set to 0
+upon collision with the wall. The position is clipped to the range `[-1.2, 0.6]` and
+velocity is clipped to the range `[-0.07, 0.07]`.
+
+
+### Reward:
+
+The goal is to reach the flag placed on top of the right hill as quickly as possible, as such the agent is
+penalised with a reward of -1 for each timestep.
+
+### Starting State
+
+The position of the car is assigned a uniform random value in *[-0.6 , -0.4]*.
+The starting velocity of the car is always assigned to 0.
+
+### Episode End
+
+The episode ends if either of the following happens:
+1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill)
+2. Truncation: The length of the episode is 200.
+
+
+### Arguments
+
+```
+gymnasium.make('MountainCar-v0')
+```
+
+### Version History
+
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/classic_control/mountain_car_continuous.md b/docs/environments/classic_control/mountain_car_continuous.md
new file mode 100644
index 000000000..6c6a0367a
--- /dev/null
+++ b/docs/environments/classic_control/mountain_car_continuous.md
@@ -0,0 +1,95 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Mountain Car Continuous
+---
+
+# Mountain Car Continuous
+
+```{figure} ../../_static/videos/classic_control/mountain_car_continuous.gif
+:width: 200px
+:name: mountain_car_continuous
+```
+
+This environment is part of the Classic Control environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-1.0, 1.0, (1,), float32) |
+| Observation Shape | (2,) |
+| Observation High | [0.6 0.07] |
+| Observation Low | [-1.2 -0.07] |
+| Import | `gymnasium.make("MountainCarContinuous-v0")` |
+
+
+### Description
+
+The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically
+at the bottom of a sinusoidal valley, with the only possible actions being the accelerations
+that can be applied to the car in either direction. The goal of the MDP is to strategically
+accelerate the car to reach the goal state on top of the right hill. There are two versions
+of the mountain car domain in gymnasium: one with discrete actions and one with continuous.
+This version is the one with continuous actions.
+
+This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf)
+
+```
+@TECHREPORT{Moore90efficientmemory-based,
+ author = {Andrew William Moore},
+ title = {Efficient Memory-based Learning for Robot Control},
+ institution = {University of Cambridge},
+ year = {1990}
+}
+```
+
+### Observation Space
+
+The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Unit |
+|-----|--------------------------------------|------|-----|--------------|
+| 0 | position of the car along the x-axis | -Inf | Inf | position (m) |
+| 1 | velocity of the car | -Inf | Inf | position (m) |
+
+### Action Space
+
+The action is a `ndarray` with shape `(1,)`, representing the directional force applied on the car.
+The action is clipped in the range `[-1,1]` and multiplied by a power of 0.0015.
+
+### Transition Dynamics:
+
+Given an action, the mountain car follows the following transition dynamics:
+
+*velocityt+1 = velocityt+1 + force * self.power - 0.0025 * cos(3 * positiont)*
+
+*positiont+1 = positiont + velocityt+1*
+
+where force is the action clipped to the range `[-1,1]` and power is a constant 0.0015.
+The collisions at either end are inelastic with the velocity set to 0 upon collision with the wall.
+The position is clipped to the range [-1.2, 0.6] and velocity is clipped to the range [-0.07, 0.07].
+
+### Reward
+
+A negative reward of *-0.1 * action2* is received at each timestep to penalise for
+taking actions of large magnitude. If the mountain car reaches the goal then a positive reward of +100
+is added to the negative reward for that timestep.
+
+### Starting State
+
+The position of the car is assigned a uniform random value in `[-0.6 , -0.4]`.
+The starting velocity of the car is always assigned to 0.
+
+### Episode End
+
+The episode ends if either of the following happens:
+1. Termination: The position of the car is greater than or equal to 0.45 (the goal position on top of the right hill)
+2. Truncation: The length of the episode is 999.
+
+### Arguments
+
+```
+gymnasium.make('MountainCarContinuous-v0')
+```
+
+### Version History
+
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/classic_control/pendulum.md b/docs/environments/classic_control/pendulum.md
new file mode 100644
index 000000000..5dd57777f
--- /dev/null
+++ b/docs/environments/classic_control/pendulum.md
@@ -0,0 +1,92 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Pendulum
+lastpage:
+---
+
+# Pendulum
+
+```{figure} ../../_static/videos/classic_control/pendulum.gif
+:width: 200px
+:name: pendulum
+```
+
+This environment is part of the Classic Control environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-2.0, 2.0, (1,), float32) |
+| Observation Shape | (3,) |
+| Observation High | [1. 1. 8.] |
+| Observation Low | [-1. -1. -8.] |
+| Import | `gymnasium.make("Pendulum-v1")` |
+
+
+ ### Description
+
+The inverted pendulum swingup problem is based on the classic problem in control theory.
+The system consists of a pendulum attached at one end to a fixed point, and the other end being free.
+The pendulum starts in a random position and the goal is to apply torque on the free end to swing it
+into an upright position, with its center of gravity right above the fixed point.
+
+The diagram below specifies the coordinate system used for the implementation of the pendulum's
+dynamic equations.
+
+![Pendulum Coordinate System](./diagrams/pendulum.png)
+
+- `x-y`: cartesian coordinates of the pendulum's end in meters.
+- `theta` : angle in radians.
+- `tau`: torque in `N m`. Defined as positive _counter-clockwise_.
+
+### Action Space
+
+The action is a `ndarray` with shape `(1,)` representing the torque applied to free end of the pendulum.
+
+| Num | Action | Min | Max |
+|-----|--------|------|-----|
+| 0 | Torque | -2.0 | 2.0 |
+
+
+### Observation Space
+
+The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free
+end and its angular velocity.
+
+| Num | Observation | Min | Max |
+|-----|------------------|------|-----|
+| 0 | x = cos(theta) | -1.0 | 1.0 |
+| 1 | y = sin(angle) | -1.0 | 1.0 |
+| 2 | Angular Velocity | -8.0 | 8.0 |
+
+### Rewards
+
+The reward function is defined as:
+
+*r = -(theta2 + 0.1 * theta_dt2 + 0.001 * torque2)*
+
+where `$ heta$` is the pendulum's angle normalized between *[-pi, pi]* (with 0 being in the upright position).
+Based on the above equation, the minimum reward that can be obtained is
+*-(pi2 + 0.1 * 82 + 0.001 * 22) = -16.2736044*,
+while the maximum reward is zero (pendulum is upright with zero velocity and no torque applied).
+
+### Starting State
+
+The starting state is a random angle in *[-pi, pi]* and a random angular velocity in *[-1,1]*.
+
+### Episode Truncation
+
+The episode truncates at 200 time steps.
+
+### Arguments
+
+- `g`: acceleration of gravity measured in *(m s-2)* used to calculate the pendulum dynamics.
+ The default value is g = 10.0 .
+
+```
+gymnasium.make('Pendulum-v1', g=9.81)
+```
+
+### Version History
+
+* v1: Simplify the math equations, no difference in behavior.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/ant.md b/docs/environments/mujoco/ant.md
new file mode 100644
index 000000000..257ca7733
--- /dev/null
+++ b/docs/environments/mujoco/ant.md
@@ -0,0 +1,183 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Ant
+firstpage:
+---
+
+# Ant
+
+```{figure} ../../_static/videos/mujoco/ant.gif
+:width: 200px
+:name: ant
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-1.0, 1.0, (8,), float32) |
+| Observation Shape | (27,) |
+| Observation High | [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("Ant-v4")` |
+
+
+### Description
+
+This environment is based on the environment introduced by Schulman,
+Moritz, Levine, Jordan and Abbeel in ["High-Dimensional Continuous Control
+Using Generalized Advantage Estimation"](https://arxiv.org/abs/1506.02438).
+The ant is a 3D robot consisting of one torso (free rotational body) with
+four legs attached to it with each leg having two links. The goal is to
+coordinate the four legs to move in the forward (right) direction by applying
+torques on the eight hinges connecting the two links of each leg and the torso
+(nine parts and eight hinges).
+
+### Action Space
+The action space is a `Box(-1, 1, (8,), float32)`. An action represents the torques applied at the hinge joints.
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | ----------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
+| 0 | Torque applied on the rotor between the torso and front left hip | -1 | 1 | hip_1 (front_left_leg) | hinge | torque (N m) |
+| 1 | Torque applied on the rotor between the front left two links | -1 | 1 | angle_1 (front_left_leg) | hinge | torque (N m) |
+| 2 | Torque applied on the rotor between the torso and front right hip | -1 | 1 | hip_2 (front_right_leg) | hinge | torque (N m) |
+| 3 | Torque applied on the rotor between the front right two links | -1 | 1 | angle_2 (front_right_leg) | hinge | torque (N m) |
+| 4 | Torque applied on the rotor between the torso and back left hip | -1 | 1 | hip_3 (back_leg) | hinge | torque (N m) |
+| 5 | Torque applied on the rotor between the back left two links | -1 | 1 | angle_3 (back_leg) | hinge | torque (N m) |
+| 6 | Torque applied on the rotor between the torso and back right hip | -1 | 1 | hip_4 (right_back_leg) | hinge | torque (N m) |
+| 7 | Torque applied on the rotor between the back right two links | -1 | 1 | angle_4 (right_back_leg) | hinge | torque (N m) |
+
+### Observation Space
+
+Observations consist of positional values of different body parts of the ant,
+followed by the velocities of those individual parts (their derivatives) with all
+the positions ordered before all the velocities.
+
+By default, observations do not include the x- and y-coordinates of the ant's torso. These may
+be included by passing `exclude_current_positions_from_observation=False` during construction.
+In that case, the observation space will have 113 dimensions where the first two dimensions
+represent the x- and y- coordinates of the ant's torso.
+Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
+of the torso will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
+
+However, by default, an observation is a `ndarray` with shape `(111,)`
+where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|--------------------------------------------------------------|--------|--------|----------------------------------------|-------|--------------------------|
+| 0 | z-coordinate of the torso (centre) | -Inf | Inf | torso | free | position (m) |
+| 1 | x-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
+| 2 | y-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
+| 3 | z-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
+| 4 | w-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) |
+| 5 | angle between torso and first link on front left | -Inf | Inf | hip_1 (front_left_leg) | hinge | angle (rad) |
+| 6 | angle between the two links on the front left | -Inf | Inf | ankle_1 (front_left_leg) | hinge | angle (rad) |
+| 7 | angle between torso and first link on front right | -Inf | Inf | hip_2 (front_right_leg) | hinge | angle (rad) |
+| 8 | angle between the two links on the front right | -Inf | Inf | ankle_2 (front_right_leg) | hinge | angle (rad) |
+| 9 | angle between torso and first link on back left | -Inf | Inf | hip_3 (back_leg) | hinge | angle (rad) |
+| 10 | angle between the two links on the back left | -Inf | Inf | ankle_3 (back_leg) | hinge | angle (rad) |
+| 11 | angle between torso and first link on back right | -Inf | Inf | hip_4 (right_back_leg) | hinge | angle (rad) |
+| 12 | angle between the two links on the back right | -Inf | Inf | ankle_4 (right_back_leg) | hinge | angle (rad) |
+| 13 | x-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) |
+| 14 | y-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) |
+| 15 | z-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) |
+| 16 | x-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) |
+| 17 | y-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) |
+| 18 | z-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) |
+| 19 | angular velocity of angle between torso and front left link | -Inf | Inf | hip_1 (front_left_leg) | hinge | angle (rad) |
+| 20 | angular velocity of the angle between front left links | -Inf | Inf | ankle_1 (front_left_leg) | hinge | angle (rad) |
+| 21 | angular velocity of angle between torso and front right link | -Inf | Inf | hip_2 (front_right_leg) | hinge | angle (rad) |
+| 22 | angular velocity of the angle between front right links | -Inf | Inf | ankle_2 (front_right_leg) | hinge | angle (rad) |
+| 23 | angular velocity of angle between torso and back left link | -Inf | Inf | hip_3 (back_leg) | hinge | angle (rad) |
+| 24 | angular velocity of the angle between back left links | -Inf | Inf | ankle_3 (back_leg) | hinge | angle (rad) |
+| 25 | angular velocity of angle between torso and back right link | -Inf | Inf | hip_4 (right_back_leg) | hinge | angle (rad) |
+| 26 |angular velocity of the angle between back right links | -Inf | Inf | ankle_4 (right_back_leg) | hinge | angle (rad) |
+
+
+The remaining 14*6 = 84 elements of the observation are contact forces
+(external forces - force x, y, z and torque x, y, z) applied to the
+center of mass of each of the links. The 14 links are: the ground link,
+the torso link, and 3 links for each leg (1 + 1 + 12) with the 6 external forces.
+
+The (x,y,z) coordinates are translational DOFs while the orientations are rotational
+DOFs expressed as quaternions. One can read more about free joints on the [Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
+
+
+**Note:** Ant-v4 environment no longer has the following contact forces issue.
+If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results
+in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0
+when using the Ant environment if you would like to report results with contact forces (if
+contact forces are not used in your experiments, you can use version > 2.0).
+
+### Rewards
+The reward consists of three parts:
+- *healthy_reward*: Every timestep that the ant is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`
+- *forward_reward*: A reward of moving forward which is measured as
+*(x-coordinate before action - x-coordinate after action)/dt*. *dt* is the time
+between actions and is dependent on the `frame_skip` parameter (default is 5),
+where the frametime is 0.01 - making the default *dt = 5 * 0.01 = 0.05*.
+This reward would be positive if the ant moves forward (in positive x direction).
+- *ctrl_cost*: A negative reward for penalising the ant if it takes actions
+that are too large. It is measured as *`ctrl_cost_weight` * sum(action2)*
+where *`ctr_cost_weight`* is a parameter set for the control and has a default value of 0.5.
+- *contact_cost*: A negative reward for penalising the ant if the external contact
+force is too large. It is calculated *`contact_cost_weight` * sum(clip(external contact
+force to `contact_force_range`)2)*.
+
+The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost* and `info` will also contain the individual reward terms.
+
+### Starting State
+All observations start in state
+(0.0, 0.0, 0.75, 1.0, 0.0 ... 0.0) with a uniform noise in the range
+of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional values and standard normal noise
+with mean 0 and standard deviation `reset_noise_scale` added to the velocity values for
+stochasticity. Note that the initial z coordinate is intentionally selected
+to be slightly high, thereby indicating a standing up ant. The initial orientation
+is designed to make it face forward as well.
+
+### Episode End
+The ant is said to be unhealthy if any of the following happens:
+
+1. Any of the state space values is no longer finite
+2. The z-coordinate of the torso is **not** in the closed interval given by `healthy_z_range` (defaults to [0.2, 1.0])
+
+If `terminate_when_unhealthy=True` is passed during construction (which is the default),
+the episode ends when any of the following happens:
+
+1. Termination: The episode duration reaches a 1000 timesteps
+2. Truncation: The ant is unhealthy
+
+If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
+
+### Arguments
+
+No additional arguments are currently supported in v2 and lower.
+
+```
+env = gymnasium.make('Ant-v2')
+```
+
+v3 and v4 take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+```
+env = gymnasium.make('Ant-v4', ctrl_cost_weight=0.1, ...)
+```
+
+| Parameter | Type | Default |Description |
+|-------------------------|------------|--------------|-------------------------------|
+| `xml_file` | **str** | `"ant.xml"` | Path to a MuJoCo model |
+| `ctrl_cost_weight` | **float** | `0.5` | Weight for *ctrl_cost* term (see section on reward) |
+| `contact_cost_weight` | **float** | `5e-4` | Weight for *contact_cost* term (see section on reward) |
+| `healthy_reward` | **float** | `1` | Constant reward given if the ant is "healthy" after timestep |
+| `terminate_when_unhealthy` | **bool**| `True` | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range` |
+| `healthy_z_range` | **tuple** | `(0.2, 1)` | The ant is considered healthy if the z-coordinate of the torso is in this range |
+| `contact_force_range` | **tuple** | `(-1, 1)` | Contact forces are clipped to this range in the computation of *contact_cost* |
+| `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
+| `exclude_current_positions_from_observation`| **bool** | `True`| Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+### Version History
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v3: support for gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/half_cheetah.md b/docs/environments/mujoco/half_cheetah.md
new file mode 100644
index 000000000..1f499acfd
--- /dev/null
+++ b/docs/environments/mujoco/half_cheetah.md
@@ -0,0 +1,140 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Half Cheetah
+---
+
+# Half Cheetah
+
+```{figure} ../../_static/videos/mujoco/half_cheetah.gif
+:width: 200px
+:name: half_cheetah
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-1.0, 1.0, (6,), float32) |
+| Observation Shape | (17,) |
+| Observation High | [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("HalfCheetah-v4")` |
+
+
+### Description
+
+This environment is based on the work by P. Wawrzyński in
+["A Cat-Like Robot Real-Time Learning to Run"](http://staff.elka.pw.edu.pl/~pwawrzyn/pub-s/0812_LSCLRR.pdf).
+The HalfCheetah is a 2-dimensional robot consisting of 9 links and 8
+joints connecting them (including two paws). The goal is to apply a torque
+on the joints to make the cheetah run forward (right) as fast as possible,
+with a positive reward allocated based on the distance moved forward and a
+negative reward allocated for moving backward. The torso and head of the
+cheetah are fixed, and the torque can only be applied on the other 6 joints
+over the front and back thighs (connecting to the torso), shins
+(connecting to the thighs) and feet (connecting to the shins).
+
+### Action Space
+The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied between *links*.
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | --------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
+| 0 | Torque applied on the back thigh rotor | -1 | 1 | bthigh | hinge | torque (N m) |
+| 1 | Torque applied on the back shin rotor | -1 | 1 | bshin | hinge | torque (N m) |
+| 2 | Torque applied on the back foot rotor | -1 | 1 | bfoot | hinge | torque (N m) |
+| 3 | Torque applied on the front thigh rotor | -1 | 1 | fthigh | hinge | torque (N m) |
+| 4 | Torque applied on the front shin rotor | -1 | 1 | fshin | hinge | torque (N m) |
+| 5 | Torque applied on the front foot rotor | -1 | 1 | ffoot | hinge | torque (N m) |
+
+
+### Observation Space
+
+Observations consist of positional values of different body parts of the
+cheetah, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities.
+
+By default, observations do not include the x-coordinate of the cheetah's center of mass. It may
+be included by passing `exclude_current_positions_from_observation=False` during construction.
+In that case, the observation space will have 18 dimensions where the first dimension
+represents the x-coordinate of the cheetah's center of mass.
+Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate
+will be returned in `info` with key `"x_position"`.
+
+However, by default, the observation is a `ndarray` with shape `(17,)` where the elements correspond to the following:
+
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ |
+| 0 | z-coordinate of the front tip | -Inf | Inf | rootz | slide | position (m) |
+| 1 | angle of the front tip | -Inf | Inf | rooty | hinge | angle (rad) |
+| 2 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angle (rad) |
+| 3 | angle of the second rotor | -Inf | Inf | bshin | hinge | angle (rad) |
+| 4 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angle (rad) |
+| 5 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angle (rad) |
+| 6 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angle (rad) |
+| 7 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angle (rad) |
+| 8 | x-coordinate of the front tip | -Inf | Inf | rootx | slide | velocity (m/s) |
+| 9 | y-coordinate of the front tip | -Inf | Inf | rootz | slide | velocity (m/s) |
+| 10 | angle of the front tip | -Inf | Inf | rooty | hinge | angular velocity (rad/s) |
+| 11 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angular velocity (rad/s) |
+| 12 | angle of the second rotor | -Inf | Inf | bshin | hinge | angular velocity (rad/s) |
+| 13 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angular velocity (rad/s) |
+| 14 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angular velocity (rad/s) |
+| 15 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angular velocity (rad/s) |
+| 16 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angular velocity (rad/s) |
+
+### Rewards
+The reward consists of two parts:
+- *forward_reward*: A reward of moving forward which is measured
+as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
+the time between actions and is dependent on the frame_skip parameter
+(fixed to 5), where the frametime is 0.01 - making the
+default *dt = 5 * 0.01 = 0.05*. This reward would be positive if the cheetah
+runs forward (right).
+- *ctrl_cost*: A cost for penalising the cheetah if it takes
+actions that are too large. It is measured as *`ctrl_cost_weight` *
+sum(action2)* where *`ctrl_cost_weight`* is a parameter set for the
+control and has a default value of 0.1
+
+The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
+
+### Starting State
+All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,) with a noise added to the
+initial state for stochasticity. As seen before, the first 8 values in the
+state are positional and the last 9 values are velocity. A uniform noise in
+the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the positional values while a standard
+normal noise with a mean of 0 and standard deviation of `reset_noise_scale` is added to the
+initial velocity values of all zeros.
+
+### Episode End
+The episode truncates when the episode length is greater than 1000.
+
+### Arguments
+
+No additional arguments are currently supported in v2 and lower.
+
+```
+env = gymnasium.make('HalfCheetah-v2')
+```
+
+v3 and v4 take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+```
+env = gymnasium.make('HalfCheetah-v4', ctrl_cost_weight=0.1, ....)
+```
+
+| Parameter | Type | Default | Description |
+| -------------------------------------------- | --------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `xml_file` | **str** | `"half_cheetah.xml"` | Path to a MuJoCo model |
+| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
+| `ctrl_cost_weight` | **float** | `0.1` | Weight for _ctrl_cost_ weight (see section on reward) |
+| `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
+| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v3: support for gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/hopper.md b/docs/environments/mujoco/hopper.md
new file mode 100644
index 000000000..76adf7080
--- /dev/null
+++ b/docs/environments/mujoco/hopper.md
@@ -0,0 +1,145 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Hopper
+---
+
+# Hopper
+
+```{figure} ../../_static/videos/mujoco/hopper.gif
+:width: 200px
+:name: hopper
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-1.0, 1.0, (3,), float32) |
+| Observation Shape | (11,) |
+| Observation High | [inf inf inf inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("Hopper-v4")` |
+
+
+### Description
+
+This environment is based on the work done by Erez, Tassa, and Todorov in
+["Infinite Horizon Model Predictive Control for Nonlinear Periodic Tasks"](http://www.roboticsproceedings.org/rss07/p10.pdf). The environment aims to
+increase the number of independent state and control variables as compared to
+the classic control environments. The hopper is a two-dimensional
+one-legged figure that consist of four main body parts - the torso at the
+top, the thigh in the middle, the leg in the bottom, and a single foot on
+which the entire body rests. The goal is to make hops that move in the
+forward (right) direction by applying torques on the three hinges
+connecting the four body parts.
+
+### Action Space
+The action space is a `Box(-1, 1, (3,), float32)`. An action represents the torques applied between *links*
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
+| 0 | Torque applied on the thigh rotor | -1 | 1 | thigh_joint | hinge | torque (N m) |
+| 1 | Torque applied on the leg rotor | -1 | 1 | leg_joint | hinge | torque (N m) |
+| 3 | Torque applied on the foot rotor | -1 | 1 | foot_joint | hinge | torque (N m) |
+
+### Observation Space
+
+Observations consist of positional values of different body parts of the
+hopper, followed by the velocities of those individual parts
+(their derivatives) with all the positions ordered before all the velocities.
+
+By default, observations do not include the x-coordinate of the hopper. It may
+be included by passing `exclude_current_positions_from_observation=False` during construction.
+In that case, the observation space will have 12 dimensions where the first dimension
+represents the x-coordinate of the hopper.
+Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate
+will be returned in `info` with key `"x_position"`.
+
+However, by default, the observation is a `ndarray` with shape `(11,)` where the elements
+correspond to the following:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | ------------------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ |
+| 0 | z-coordinate of the top (height of hopper) | -Inf | Inf | rootz | slide | position (m) |
+| 1 | angle of the top | -Inf | Inf | rooty | hinge | angle (rad) |
+| 2 | angle of the thigh joint | -Inf | Inf | thigh_joint | hinge | angle (rad) |
+| 3 | angle of the leg joint | -Inf | Inf | leg_joint | hinge | angle (rad) |
+| 4 | angle of the foot joint | -Inf | Inf | foot_joint | hinge | angle (rad) |
+| 5 | velocity of the x-coordinate of the top | -Inf | Inf | rootx | slide | velocity (m/s) |
+| 6 | velocity of the z-coordinate (height) of the top | -Inf | Inf | rootz | slide | velocity (m/s) |
+| 7 | angular velocity of the angle of the top | -Inf | Inf | rooty | hinge | angular velocity (rad/s) |
+| 8 | angular velocity of the thigh hinge | -Inf | Inf | thigh_joint | hinge | angular velocity (rad/s) |
+| 9 | angular velocity of the leg hinge | -Inf | Inf | leg_joint | hinge | angular velocity (rad/s) |
+| 10 | angular velocity of the foot hinge | -Inf | Inf | foot_joint | hinge | angular velocity (rad/s) |
+
+
+### Rewards
+The reward consists of three parts:
+- *healthy_reward*: Every timestep that the hopper is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`.
+- *forward_reward*: A reward of hopping forward which is measured
+as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
+the time between actions and is dependent on the frame_skip parameter
+(fixed to 4), where the frametime is 0.002 - making the
+default *dt = 4 * 0.002 = 0.008*. This reward would be positive if the hopper
+hops forward (positive x direction).
+- *ctrl_cost*: A cost for penalising the hopper if it takes
+actions that are too large. It is measured as *`ctrl_cost_weight` *
+sum(action2)* where *`ctrl_cost_weight`* is a parameter set for the
+control and has a default value of 0.001
+
+The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
+
+### Starting State
+All observations start in state
+(0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise
+ in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity.
+
+### Episode End
+The hopper is said to be unhealthy if any of the following happens:
+
+1. An element of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) is no longer contained in the closed interval specified by the argument `healthy_state_range`
+2. The height of the hopper (`observation[0]` if `exclude_current_positions_from_observation=True`, else `observation[1]`) is no longer contained in the closed interval specified by the argument `healthy_z_range` (usually meaning that it has fallen)
+3. The angle (`observation[1]` if `exclude_current_positions_from_observation=True`, else `observation[2]`) is no longer contained in the closed interval specified by the argument `healthy_angle_range`
+
+If `terminate_when_unhealthy=True` is passed during construction (which is the default),
+the episode ends when any of the following happens:
+
+1. Truncation: The episode duration reaches a 1000 timesteps
+2. Termination: The hopper is unhealthy
+
+If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
+
+### Arguments
+
+No additional arguments are currently supported in v2 and lower.
+
+```
+env = gymnasium.make('Hopper-v2')
+```
+
+v3 and v4 take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+```
+env = gymnasium.make('Hopper-v4', ctrl_cost_weight=0.1, ....)
+```
+
+| Parameter | Type | Default | Description |
+| -------------------------------------------- | --------- | --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `xml_file` | **str** | `"hopper.xml"` | Path to a MuJoCo model |
+| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
+| `ctrl_cost_weight` | **float** | `0.001` | Weight for _ctrl_cost_ reward (see section on reward) |
+| `healthy_reward` | **float** | `1` | Constant reward given if the ant is "healthy" after timestep |
+| `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the hopper is no longer healthy |
+| `healthy_state_range` | **tuple** | `(-100, 100)` | The elements of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) must be in this range for the hopper to be considered healthy |
+| `healthy_z_range` | **tuple** | `(0.7, float("inf"))` | The z-coordinate must be in this range for the hopper to be considered healthy |
+| `healthy_angle_range` | **tuple** | `(-0.2, 0.2)` | The angle given by `observation[1]` (if `exclude_current_positions_from_observation=True`, else `observation[2]`) must be in this range for the hopper to be considered healthy |
+| `reset_noise_scale` | **float** | `5e-3` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
+| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v3: support for gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/humanoid.md b/docs/environments/mujoco/humanoid.md
new file mode 100644
index 000000000..78665fe09
--- /dev/null
+++ b/docs/environments/mujoco/humanoid.md
@@ -0,0 +1,213 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Humanoid
+---
+
+# Humanoid
+
+```{figure} ../../_static/videos/mujoco/humanoid.gif
+:width: 200px
+:name: humanoid
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-0.4, 0.4, (17,), float32) |
+| Observation Shape | (376,) |
+| Observation High | [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("Humanoid-v4")` |
+
+
+### Description
+
+This environment is based on the environment introduced by Tassa, Erez and Todorov
+in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025).
+The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a pair of
+legs and arms. The legs each consist of two links, and so the arms (representing the knees and
+elbows respectively). The goal of the environment is to walk forward as fast as possible without falling over.
+
+### Action Space
+The action space is a `Box(-1, 1, (17,), float32)`. An action represents the torques applied at the hinge joints.
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|----------------------|---------------|----------------|---------------------------------------|-------|------|
+| 0 | Torque applied on the hinge in the y-coordinate of the abdomen | -0.4 | 0.4 | hip_1 (front_left_leg) | hinge | torque (N m) |
+| 1 | Torque applied on the hinge in the z-coordinate of the abdomen | -0.4 | 0.4 | angle_1 (front_left_leg) | hinge | torque (N m) |
+| 2 | Torque applied on the hinge in the x-coordinate of the abdomen | -0.4 | 0.4 | hip_2 (front_right_leg) | hinge | torque (N m) |
+| 3 | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh) | hinge | torque (N m) |
+| 4 | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh) | hinge | torque (N m) |
+| 5 | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh) | hinge | torque (N m) |
+| 6 | Torque applied on the rotor between the right hip/thigh and the right shin | -0.4 | 0.4 | right_knee | hinge | torque (N m) |
+| 7 | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) | -0.4 | 0.4 | left_hip_x (left_thigh) | hinge | torque (N m) |
+| 8 | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) | -0.4 | 0.4 | left_hip_z (left_thigh) | hinge | torque (N m) |
+| 9 | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) | -0.4 | 0.4 | left_hip_y (left_thigh) | hinge | torque (N m) |
+| 10 | Torque applied on the rotor between the left hip/thigh and the left shin | -0.4 | 0.4 | left_knee | hinge | torque (N m) |
+| 11 | Torque applied on the rotor between the torso and right upper arm (coordinate -1) | -0.4 | 0.4 | right_shoulder1 | hinge | torque (N m) |
+| 12 | Torque applied on the rotor between the torso and right upper arm (coordinate -2) | -0.4 | 0.4 | right_shoulder2 | hinge | torque (N m) |
+| 13 | Torque applied on the rotor between the right upper arm and right lower arm | -0.4 | 0.4 | right_elbow | hinge | torque (N m) |
+| 14 | Torque applied on the rotor between the torso and left upper arm (coordinate -1) | -0.4 | 0.4 | left_shoulder1 | hinge | torque (N m) |
+| 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) |
+| 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) |
+
+### Observation Space
+
+Observations consist of positional values of different body parts of the Humanoid,
+ followed by the velocities of those individual parts (their derivatives) with all the
+ positions ordered before all the velocities.
+
+By default, observations do not include the x- and y-coordinates of the torso. These may
+be included by passing `exclude_current_positions_from_observation=False` during construction.
+In that case, the observation space will have 378 dimensions where the first two dimensions
+represent the x- and y-coordinates of the torso.
+Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
+will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
+
+However, by default, the observation is a `ndarray` with shape `(376,)` where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- |
+| 0 | z-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
+| 1 | x-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
+| 2 | y-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
+| 3 | z-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
+| 4 | w-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
+| 5 | z-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | angle (rad) |
+| 6 | y-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | angle (rad) |
+| 7 | x-angle of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | angle (rad) |
+| 8 | x-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | angle (rad) |
+| 9 | z-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | angle (rad) |
+| 19 | y-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | angle (rad) |
+| 11 | angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | angle (rad) |
+| 12 | x-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | angle (rad) |
+| 13 | z-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | angle (rad) |
+| 14 | y-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | angle (rad) |
+| 15 | angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | angle (rad) |
+| 16 | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | angle (rad) |
+| 17 | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | angle (rad) |
+| 18 | angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | angle (rad) |
+| 19 | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | angle (rad) |
+| 20 | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | angle (rad) |
+| 21 | angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | angle (rad) |
+| 22 | x-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
+| 23 | y-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
+| 24 | z-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
+| 25 | x-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
+| 26 | y-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
+| 27 | z-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
+| 28 | z-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | anglular velocity (rad/s) |
+| 29 | y-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | anglular velocity (rad/s) |
+| 30 | x-coordinate of angular velocity of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | aanglular velocity (rad/s) |
+| 31 | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | anglular velocity (rad/s) |
+| 32 | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | anglular velocity (rad/s) |
+| 33 | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | anglular velocity (rad/s) |
+| 34 | angular velocity of the angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | anglular velocity (rad/s) |
+| 35 | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | anglular velocity (rad/s) |
+| 36 | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | anglular velocity (rad/s) |
+| 37 | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | anglular velocity (rad/s) |
+| 38 | angular velocity of the angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | anglular velocity (rad/s) |
+| 39 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | anglular velocity (rad/s) |
+| 40 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | anglular velocity (rad/s) |
+| 41 | angular velocity of the angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | anglular velocity (rad/s) |
+| 42 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | anglular velocity (rad/s) |
+| 43 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | anglular velocity (rad/s) |
+| 44 | angular velocitty of the angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | anglular velocity (rad/s) |
+
+Additionally, after all the positional and velocity based values in the table,
+the observation contains (in order):
+- *cinert:* Mass and inertia of a single rigid body relative to the center of mass
+(this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*)
+and hence adds to another 140 elements in the state space.
+- *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence
+adds another 84 elements in the state space
+- *qfrc_actuator:* Constraint force generated as the actuator force. This has shape
+`(23,)` *(nv * 1)* and hence adds another 23 elements to the state space.
+- *cfrc_ext:* This is the center of mass based external force on the body. It has shape
+14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space.
+where *nbody* stands for the number of bodies in the robot and *nv* stands for the
+number of degrees of freedom (*= dim(qvel)*)
+
+The (x,y,z) coordinates are translational DOFs while the orientations are rotational
+DOFs expressed as quaternions. One can read more about free joints on the
+[Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
+
+**Note:** Humanoid-v4 environment no longer has the following contact forces issue.
+If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0
+results in the contact forces always being 0. As such we recommend to use a Mujoco-Py
+version < 2.0 when using the Humanoid environment if you would like to report results
+with contact forces (if contact forces are not used in your experiments, you can use
+version > 2.0).
+
+### Rewards
+The reward consists of three parts:
+- *healthy_reward*: Every timestep that the humanoid is alive (see section Episode Termination for definition), it gets a reward of fixed value `healthy_reward`
+- *forward_reward*: A reward of walking forward which is measured as *`forward_reward_weight` *
+(average center of mass before action - average center of mass after action)/dt*.
+*dt* is the time between actions and is dependent on the frame_skip parameter
+(default is 5), where the frametime is 0.003 - making the default *dt = 5 * 0.003 = 0.015*.
+This reward would be positive if the humanoid walks forward (in positive x-direction). The calculation
+for the center of mass is defined in the `.py` file for the Humanoid.
+- *ctrl_cost*: A negative reward for penalising the humanoid if it has too
+large of a control force. If there are *nu* actuators/controls, then the control has
+shape `nu x 1`. It is measured as *`ctrl_cost_weight` * sum(control2)*.
+- *contact_cost*: A negative reward for penalising the humanoid if the external
+contact force is too large. It is calculated by clipping
+*`contact_cost_weight` * sum(external contact force2)* to the interval specified by `contact_cost_range`.
+
+The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost* and `info` will also contain the individual reward terms
+
+### Starting State
+All observations start in state
+(0.0, 0.0, 1.4, 1.0, 0.0 ... 0.0) with a uniform noise in the range
+of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional and velocity values (values in the table)
+for stochasticity. Note that the initial z coordinate is intentionally
+selected to be high, thereby indicating a standing up humanoid. The initial
+orientation is designed to make it face forward as well.
+
+### Episode End
+The humanoid is said to be unhealthy if the z-position of the torso is no longer contained in the
+closed interval specified by the argument `healthy_z_range`.
+
+If `terminate_when_unhealthy=True` is passed during construction (which is the default),
+the episode ends when any of the following happens:
+
+1. Truncation: The episode duration reaches a 1000 timesteps
+3. Termination: The humanoid is unhealthy
+
+If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
+
+### Arguments
+
+No additional arguments are currently supported in v2 and lower.
+
+```
+env = gymnasium.make('Humanoid-v4')
+```
+
+v3 and v4 take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+```
+env = gymnasium.make('Humanoid-v4', ctrl_cost_weight=0.1, ....)
+```
+
+| Parameter | Type | Default | Description |
+| -------------------------------------------- | --------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `xml_file` | **str** | `"humanoid.xml"` | Path to a MuJoCo model |
+| `forward_reward_weight` | **float** | `1.25` | Weight for _forward_reward_ term (see section on reward) |
+| `ctrl_cost_weight` | **float** | `0.1` | Weight for _ctrl_cost_ term (see section on reward) |
+| `contact_cost_weight` | **float** | `5e-7` | Weight for _contact_cost_ term (see section on reward) |
+| `healthy_reward` | **float** | `5.0` | Constant reward given if the humanoid is "healthy" after timestep |
+| `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range` |
+| `healthy_z_range` | **tuple** | `(1.0, 2.0)` | The humanoid is considered healthy if the z-coordinate of the torso is in this range |
+| `reset_noise_scale` | **float** | `1e-2` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
+| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v3: support for gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/humanoid_standup.md b/docs/environments/mujoco/humanoid_standup.md
new file mode 100644
index 000000000..a2c0a97c5
--- /dev/null
+++ b/docs/environments/mujoco/humanoid_standup.md
@@ -0,0 +1,192 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Humanoid Standup
+---
+
+# Humanoid Standup
+
+```{figure} ../../_static/videos/mujoco/humanoid_standup.gif
+:width: 200px
+:name: humanoid_standup
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-0.4, 0.4, (17,), float32) |
+| Observation Shape | (376,) |
+| Observation High | [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("HumanoidStandup-v4")` |
+
+
+### Description
+
+This environment is based on the environment introduced by Tassa, Erez and Todorov
+in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025).
+The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a
+pair of legs and arms. The legs each consist of two links, and so the arms (representing the
+knees and elbows respectively). The environment starts with the humanoid laying on the ground,
+and then the goal of the environment is to make the humanoid standup and then keep it standing
+by applying torques on the various hinges.
+
+### Action Space
+The agent take a 17-element vector for actions.
+
+The action space is a continuous `(action, ...)` all in `[-1, 1]`, where `action`
+represents the numerical torques applied at the hinge joints.
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | ---------------------------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ |
+| 0 | Torque applied on the hinge in the y-coordinate of the abdomen | -0.4 | 0.4 | hip_1 (front_left_leg) | hinge | torque (N m) |
+| 1 | Torque applied on the hinge in the z-coordinate of the abdomen | -0.4 | 0.4 | angle_1 (front_left_leg) | hinge | torque (N m) |
+| 2 | Torque applied on the hinge in the x-coordinate of the abdomen | -0.4 | 0.4 | hip_2 (front_right_leg) | hinge | torque (N m) |
+| 3 | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh) | hinge | torque (N m) |
+| 4 | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh) | hinge | torque (N m) |
+| 5 | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh) | hinge | torque (N m) |
+| 6 | Torque applied on the rotor between the right hip/thigh and the right shin | -0.4 | 0.4 | right_knee | hinge | torque (N m) |
+| 7 | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) | -0.4 | 0.4 | left_hip_x (left_thigh) | hinge | torque (N m) |
+| 8 | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) | -0.4 | 0.4 | left_hip_z (left_thigh) | hinge | torque (N m) |
+| 9 | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) | -0.4 | 0.4 | left_hip_y (left_thigh) | hinge | torque (N m) |
+| 10 | Torque applied on the rotor between the left hip/thigh and the left shin | -0.4 | 0.4 | left_knee | hinge | torque (N m) |
+| 11 | Torque applied on the rotor between the torso and right upper arm (coordinate -1) | -0.4 | 0.4 | right_shoulder1 | hinge | torque (N m) |
+| 12 | Torque applied on the rotor between the torso and right upper arm (coordinate -2) | -0.4 | 0.4 | right_shoulder2 | hinge | torque (N m) |
+| 13 | Torque applied on the rotor between the right upper arm and right lower arm | -0.4 | 0.4 | right_elbow | hinge | torque (N m) |
+| 14 | Torque applied on the rotor between the torso and left upper arm (coordinate -1) | -0.4 | 0.4 | left_shoulder1 | hinge | torque (N m) |
+| 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) |
+| 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) |
+
+### Observation Space
+
+The state space consists of positional values of different body parts of the Humanoid,
+followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities.
+
+**Note:** The x- and y-coordinates of the torso are being omitted to produce position-agnostic behavior in policies
+
+The observation is a `ndarray` with shape `(376,)` where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- |
+| 0 | z-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) |
+| 1 | x-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
+| 2 | y-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
+| 3 | z-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
+| 4 | w-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) |
+| 5 | z-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | angle (rad) |
+| 6 | y-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | angle (rad) |
+| 7 | x-angle of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | angle (rad) |
+| 8 | x-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | angle (rad) |
+| 9 | z-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | angle (rad) |
+| 10 | y-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | angle (rad) |
+| 11 | angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | angle (rad) |
+| 12 | x-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | angle (rad) |
+| 13 | z-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | angle (rad) |
+| 14 | y-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | angle (rad) |
+| 15 | angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | angle (rad) |
+| 16 | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | angle (rad) |
+| 17 | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | angle (rad) |
+| 18 | angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | angle (rad) |
+| 19 | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | angle (rad) |
+| 20 | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | angle (rad) |
+| 21 | angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | angle (rad) |
+| 22 | x-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
+| 23 | y-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
+| 24 | z-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) |
+| 25 | x-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
+| 26 | y-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
+| 27 | z-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) |
+| 28 | z-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | anglular velocity (rad/s) |
+| 29 | y-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | anglular velocity (rad/s) |
+| 30 | x-coordinate of angular velocity of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | aanglular velocity (rad/s) |
+| 31 | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | anglular velocity (rad/s) |
+| 32 | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | anglular velocity (rad/s) |
+| 33 | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | anglular velocity (rad/s) |
+| 35 | angular velocity of the angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | anglular velocity (rad/s) |
+| 36 | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | anglular velocity (rad/s) |
+| 37 | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | anglular velocity (rad/s) |
+| 38 | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | anglular velocity (rad/s) |
+| 39 | angular velocity of the angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | anglular velocity (rad/s) |
+| 40 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | anglular velocity (rad/s) |
+| 41 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | anglular velocity (rad/s) |
+| 42 | angular velocity of the angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | anglular velocity (rad/s) |
+| 43 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | anglular velocity (rad/s) |
+| 44 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | anglular velocity (rad/s) |
+| 45 | angular velocitty of the angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | anglular velocity (rad/s) |
+
+
+Additionally, after all the positional and velocity based values in the table,
+the state_space consists of (in order):
+- *cinert:* Mass and inertia of a single rigid body relative to the center of mass
+(this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*)
+and hence adds to another 140 elements in the state space.
+- *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence
+adds another 84 elements in the state space
+- *qfrc_actuator:* Constraint force generated as the actuator force. This has shape
+`(23,)` *(nv * 1)* and hence adds another 23 elements to the state space.
+- *cfrc_ext:* This is the center of mass based external force on the body. It has shape
+14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space.
+where *nbody* stands for the number of bodies in the robot and *nv* stands for the number
+of degrees of freedom (*= dim(qvel)*)
+
+The (x,y,z) coordinates are translational DOFs while the orientations are rotational
+DOFs expressed as quaternions. One can read more about free joints on the
+[Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html).
+
+**Note:** HumanoidStandup-v4 environment no longer has the following contact forces issue.
+If using previous HumanoidStandup versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results
+in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0
+when using the Humanoid environment if you would like to report results with contact forces
+(if contact forces are not used in your experiments, you can use version > 2.0).
+
+### Rewards
+The reward consists of three parts:
+- *uph_cost*: A reward for moving upward (in an attempt to stand up). This is not a relative
+reward which measures how much upward it has moved from the last timestep, but it is an
+absolute reward which measures how much upward the Humanoid has moved overall. It is
+measured as *(z coordinate after action - 0)/(atomic timestep)*, where *z coordinate after
+action* is index 0 in the state/index 2 in the table, and *atomic timestep* is the time for
+one frame of movement even though the simulation has a framerate of 5 (done in order to inflate
+rewards a little for faster learning).
+- *quad_ctrl_cost*: A negative reward for penalising the humanoid if it has too large of
+a control force. If there are *nu* actuators/controls, then the control has shape `nu x 1`.
+It is measured as *0.1 **x** sum(control2)*.
+- *quad_impact_cost*: A negative reward for penalising the humanoid if the external
+contact force is too large. It is calculated as *min(0.5 * 0.000001 * sum(external
+contact force2), 10)*.
+
+The total reward returned is ***reward*** *=* *uph_cost + 1 - quad_ctrl_cost - quad_impact_cost*
+
+### Starting State
+All observations start in state
+(0.0, 0.0, 0.105, 1.0, 0.0 ... 0.0) with a uniform noise in the range of
+[-0.01, 0.01] added to the positional and velocity values (values in the table)
+for stochasticity. Note that the initial z coordinate is intentionally selected
+to be low, thereby indicating a laying down humanoid. The initial orientation is
+designed to make it face forward as well.
+
+### Episode End
+The episode ends when any of the following happens:
+
+1. Truncation: The episode duration reaches a 1000 timesteps
+2. Termination: Any of the state space values is no longer finite
+
+### Arguments
+
+No additional arguments are currently supported.
+
+```
+env = gymnasium.make('HumanoidStandup-v4')
+```
+
+There is no v3 for HumanoidStandup, unlike the robot environments where a v3 and
+beyond take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v3: support for gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/index.html b/docs/environments/mujoco/index.html
new file mode 100644
index 000000000..bab47bd06
--- /dev/null
+++ b/docs/environments/mujoco/index.html
@@ -0,0 +1,125 @@
+
+
+
+
\ No newline at end of file
diff --git a/docs/environments/mujoco/index.md b/docs/environments/mujoco/index.md
new file mode 100644
index 000000000..ff2b3247a
--- /dev/null
+++ b/docs/environments/mujoco/index.md
@@ -0,0 +1,41 @@
+---
+firstpage:
+lastpage:
+---
+
+## MuJoCo
+
+```{toctree}
+:hidden:
+
+ant
+half_cheetah
+hopper
+humanoid_standup
+humanoid
+inverted_double_pendulum
+inverted_pendulum
+reacher
+swimmer
+walker2d
+```
+
+```{raw} html
+ :file: index.html
+```
+
+MuJoCo stands for Multi-Joint dynamics with Contact. It is a physics engine for faciliatating research and development in robotics, biomechanics, graphics and animation, and other areas where fast and accurate simulation is needed.
+
+The unique dependencies for this set of environments can be installed via:
+
+````bash
+pip install gymnasium[mujoco]
+````
+
+These environments also require that the MuJoCo engine be installed. As of October 2021 DeepMind has acquired MuJoCo and is open sourcing it in 2022, making it free for everyone. Instructions on installing the MuJoCo engine can be found at their [website](https://mujoco.org) and [GitHub repository](https://github.com/deepmind/mujoco). Using MuJoCo with Gymnasium also requires that the framework `mujoco-py` be installed, which can be found at the [GitHub repository](https://github.com/openai/mujoco-py/tree/master/mujoco_py) (this dependency in installed with the above command).
+
+There are ten Mujoco environments: Ant, HalfCheetah, Hopper, Humanoid, HumanoidStandup, IvertedDoublePendulum, InvertedPendulum, Reacher, Swimmer, and Walker. All of these environments are stochastic in terms of their initial state, with a Gaussian noise added to a fixed initial state in order to add stochasticity. The state spaces for MuJoCo environments in Gymnasium consist of two parts that are flattened and concatenated together: a position of a body part ('*mujoco-py.mjsim.qpos*') or joint and its corresponding velocity ('*mujoco-py.mjsim.qvel*'). Often, some of the first positional elements are omitted from the state space since the reward is calculated based on their values, leaviing it up to the algorithm to infer those hidden values indirectly.
+
+Among Gymnasium environments, this set of environments can be considered as more difficult ones to solve by a policy.
+
+Environments can be configured by changing the XML files or by tweaking the parameters of their classes.
\ No newline at end of file
diff --git a/docs/environments/mujoco/inverted_double_pendulum.md b/docs/environments/mujoco/inverted_double_pendulum.md
new file mode 100644
index 000000000..6682dc77d
--- /dev/null
+++ b/docs/environments/mujoco/inverted_double_pendulum.md
@@ -0,0 +1,126 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Inverted Double Pendulum
+---
+
+# Inverted Double Pendulum
+
+```{figure} ../../_static/videos/mujoco/inverted_double_pendulum.gif
+:width: 200px
+:name: inverted_double_pendulum
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-1.0, 1.0, (1,), float32) |
+| Observation Shape | (11,) |
+| Observation High | [inf inf inf inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("InvertedDoublePendulum-v4")` |
+
+
+### Description
+
+This environment originates from control theory and builds on the cartpole
+environment based on the work done by Barto, Sutton, and Anderson in
+["Neuronlike adaptive elements that can solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077),
+powered by the Mujoco physics simulator - allowing for more complex experiments
+(such as varying the effects of gravity or constraints). This environment involves a cart that can
+moved linearly, with a pole fixed on it and a second pole fixed on the other end of the first one
+(leaving the second pole as the only one with one free end). The cart can be pushed left or right,
+and the goal is to balance the second pole on top of the first pole, which is in turn on top of the
+cart, by applying continuous forces on the cart.
+
+### Action Space
+The agent take a 1-element vector for actions.
+The action space is a continuous `(action)` in `[-1, 1]`, where `action` represents the
+numerical force applied to the cart (with magnitude representing the amount of force and
+sign representing the direction)
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------|
+| 0 | Force applied on the cart | -1 | 1 | slider | slide | Force (N) |
+
+### Observation Space
+
+The state space consists of positional values of different body parts of the pendulum system,
+followed by the velocities of those individual parts (their derivatives) with all the
+positions ordered before all the velocities.
+
+The observation is a `ndarray` with shape `(11,)` where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | ----------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
+| 0 | position of the cart along the linear surface | -Inf | Inf | slider | slide | position (m) |
+| 1 | sine of the angle between the cart and the first pole | -Inf | Inf | sin(hinge) | hinge | unitless |
+| 2 | sine of the angle between the two poles | -Inf | Inf | sin(hinge2) | hinge | unitless |
+| 3 | cosine of the angle between the cart and the first pole | -Inf | Inf | cos(hinge) | hinge | unitless |
+| 4 | cosine of the angle between the two poles | -Inf | Inf | cos(hinge2) | hinge | unitless |
+| 5 | velocity of the cart | -Inf | Inf | slider | slide | velocity (m/s) |
+| 6 | angular velocity of the angle between the cart and the first pole | -Inf | Inf | hinge | hinge | angular velocity (rad/s) |
+| 7 | angular velocity of the angle between the two poles | -Inf | Inf | hinge2 | hinge | angular velocity (rad/s) |
+| 8 | constraint force - 1 | -Inf | Inf | | | Force (N) |
+| 9 | constraint force - 2 | -Inf | Inf | | | Force (N) |
+| 10 | constraint force - 3 | -Inf | Inf | | | Force (N) |
+
+
+There is physical contact between the robots and their environment - and Mujoco
+attempts at getting realisitic physics simulations for the possible physical contact
+dynamics by aiming for physical accuracy and computational efficiency.
+
+There is one constraint force for contacts for each degree of freedom (3).
+The approach and handling of constraints by Mujoco is unique to the simulator
+and is based on their research. Once can find more information in their
+[*documentation*](https://mujoco.readthedocs.io/en/latest/computation.html)
+or in their paper
+["Analytically-invertible dynamics with contacts and constraints: Theory and implementation in MuJoCo"](https://homes.cs.washington.edu/~todorov/papers/TodorovICRA14.pdf).
+
+
+### Rewards
+
+The reward consists of two parts:
+- *alive_bonus*: The goal is to make the second inverted pendulum stand upright
+(within a certain angle limit) as long as possible - as such a reward of +10 is awarded
+ for each timestep that the second pole is upright.
+- *distance_penalty*: This reward is a measure of how far the *tip* of the second pendulum
+(the only free end) moves, and it is calculated as
+*0.01 * x2 + (y - 2)2*, where *x* is the x-coordinate of the tip
+and *y* is the y-coordinate of the tip of the second pole.
+- *velocity_penalty*: A negative reward for penalising the agent if it moves too
+fast *0.001 * v12 + 0.005 * v22*
+
+The total reward returned is ***reward*** *=* *alive_bonus - distance_penalty - velocity_penalty*
+
+### Starting State
+All observations start in state
+(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range
+of [-0.1, 0.1] added to the positional values (cart position and pole angles) and standard
+normal force with a standard deviation of 0.1 added to the velocity values for stochasticity.
+
+### Episode End
+The episode ends when any of the following happens:
+
+1.Truncation: The episode duration reaches 1000 timesteps.
+2.Termination: Any of the state space values is no longer finite.
+3.Termination: The y_coordinate of the tip of the second pole *is less than or equal* to 1. The maximum standing height of the system is 1.196 m when all the parts are perpendicularly vertical on top of each other).
+
+### Arguments
+
+No additional arguments are currently supported.
+
+```
+env = gymnasium.make('InvertedDoublePendulum-v4')
+```
+There is no v3 for InvertedPendulum, unlike the robot environments where a v3 and
+beyond take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v3: support for gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum)
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/inverted_pendulum.md b/docs/environments/mujoco/inverted_pendulum.md
new file mode 100644
index 000000000..71d101ae6
--- /dev/null
+++ b/docs/environments/mujoco/inverted_pendulum.md
@@ -0,0 +1,97 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Inverted Pendulum
+---
+
+# Inverted Pendulum
+
+```{figure} ../../_static/videos/mujoco/inverted_pendulum.gif
+:width: 200px
+:name: inverted_pendulum
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-3.0, 3.0, (1,), float32) |
+| Observation Shape | (4,) |
+| Observation High | [inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf] |
+| Import | `gymnasium.make("InvertedPendulum-v4")` |
+
+
+### Description
+
+This environment is the cartpole environment based on the work done by
+Barto, Sutton, and Anderson in ["Neuronlike adaptive elements that can
+solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077),
+just like in the classic environments but now powered by the Mujoco physics simulator -
+allowing for more complex experiments (such as varying the effects of gravity).
+This environment involves a cart that can moved linearly, with a pole fixed on it
+at one end and having another end free. The cart can be pushed left or right, and the
+goal is to balance the pole on the top of the cart by applying forces on the cart.
+
+### Action Space
+The agent take a 1-element vector for actions.
+
+The action space is a continuous `(action)` in `[-3, 3]`, where `action` represents
+the numerical force applied to the cart (with magnitude representing the amount of
+force and sign representing the direction)
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------|
+| 0 | Force applied on the cart | -3 | 3 | slider | slide | Force (N) |
+
+### Observation Space
+
+The state space consists of positional values of different body parts of
+the pendulum system, followed by the velocities of those individual parts (their derivatives)
+with all the positions ordered before all the velocities.
+
+The observation is a `ndarray` with shape `(4,)` where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | --------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------- |
+| 0 | position of the cart along the linear surface | -Inf | Inf | slider | slide | position (m) |
+| 1 | vertical angle of the pole on the cart | -Inf | Inf | hinge | hinge | angle (rad) |
+| 2 | linear velocity of the cart | -Inf | Inf | slider | slide | velocity (m/s) |
+| 3 | angular velocity of the pole on the cart | -Inf | Inf | hinge | hinge | anglular velocity (rad/s) |
+
+
+### Rewards
+
+The goal is to make the inverted pendulum stand upright (within a certain angle limit)
+as long as possible - as such a reward of +1 is awarded for each timestep that
+the pole is upright.
+
+### Starting State
+All observations start in state
+(0.0, 0.0, 0.0, 0.0) with a uniform noise in the range
+of [-0.01, 0.01] added to the values for stochasticity.
+
+### Episode End
+The episode ends when any of the following happens:
+
+1. Truncation: The episode duration reaches 1000 timesteps.
+2. Termination: Any of the state space values is no longer finite.
+3. Termination: The absolutely value of the vertical angle between the pole and the cart is greater than 0.2 radian.
+
+### Arguments
+
+No additional arguments are currently supported.
+
+```
+env = gymnasium.make('InvertedPendulum-v4')
+```
+There is no v3 for InvertedPendulum, unlike the robot environments where a
+v3 and beyond take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v3: support for gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum)
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/pusher.md b/docs/environments/mujoco/pusher.md
new file mode 100644
index 000000000..e45ef6d10
--- /dev/null
+++ b/docs/environments/mujoco/pusher.md
@@ -0,0 +1,142 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Pusher
+---
+
+# Pusher
+
+```{figure} ../../_static/videos/mujoco/pusher.gif
+:width: 200px
+:name: pusher
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-2.0, 2.0, (7,), float32) |
+| Observation Shape | (23,) |
+| Observation High | [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("Pusher-v4")` |
+
+
+### Description
+"Pusher" is a multi-jointed robot arm which is very similar to that of a human.
+ The goal is to move a target cylinder (called *object*) to a goal position using the robot's end effector (called *fingertip*).
+ The robot consists of shoulder, elbow, forearm, and wrist joints.
+
+### Action Space
+The action space is a `Box(-2, 2, (7,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints.
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|--------------------------------------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
+| 0 | Rotation of the panning the shoulder | -2 | 2 | r_shoulder_pan_joint | hinge | torque (N m) |
+| 1 | Rotation of the shoulder lifting joint | -2 | 2 | r_shoulder_lift_joint | hinge | torque (N m) |
+| 2 | Rotation of the shoulder rolling joint | -2 | 2 | r_upper_arm_roll_joint | hinge | torque (N m) |
+| 3 | Rotation of hinge joint that flexed the elbow | -2 | 2 | r_elbow_flex_joint | hinge | torque (N m) |
+| 4 | Rotation of hinge that rolls the forearm | -2 | 2 | r_forearm_roll_joint | hinge | torque (N m) |
+| 5 | Rotation of flexing the wrist | -2 | 2 | r_wrist_flex_joint | hinge | torque (N m) |
+| 6 | Rotation of rolling the wrist | -2 | 2 | r_wrist_roll_joint | hinge | torque (N m) |
+
+### Observation Space
+
+Observations consist of
+
+- Angle of rotational joints on the pusher
+- Angular velocities of rotational joints on the pusher
+- The coordinates of the fingertip of the pusher
+- The coordinates of the object to be moved
+- The coordinates of the goal position
+
+The observation is a `ndarray` with shape `(23,)` where the elements correspond to the table below.
+An analogy can be drawn to a human arm in order to help understand the state space, with the words flex and roll meaning the
+same as human joints.
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | -------------------------------------------------------- | ---- | --- | -------------------------------- | -------- | ------------------------ |
+| 0 | Rotation of the panning the shoulder | -Inf | Inf | r_shoulder_pan_joint | hinge | angle (rad) |
+| 1 | Rotation of the shoulder lifting joint | -Inf | Inf | r_shoulder_lift_joint | hinge | angle (rad) |
+| 2 | Rotation of the shoulder rolling joint | -Inf | Inf | r_upper_arm_roll_joint | hinge | angle (rad) |
+| 3 | Rotation of hinge joint that flexed the elbow | -Inf | Inf | r_elbow_flex_joint | hinge | angle (rad) |
+| 4 | Rotation of hinge that rolls the forearm | -Inf | Inf | r_forearm_roll_joint | hinge | angle (rad) |
+| 5 | Rotation of flexing the wrist | -Inf | Inf | r_wrist_flex_joint | hinge | angle (rad) |
+| 6 | Rotation of rolling the wrist | -Inf | Inf | r_wrist_roll_joint | hinge | angle (rad) |
+| 7 | Rotational velocity of the panning the shoulder | -Inf | Inf | r_shoulder_pan_joint | hinge | angular velocity (rad/s) |
+| 8 | Rotational velocity of the shoulder lifting joint | -Inf | Inf | r_shoulder_lift_joint | hinge | angular velocity (rad/s) |
+| 9 | Rotational velocity of the shoulder rolling joint | -Inf | Inf | r_upper_arm_roll_joint | hinge | angular velocity (rad/s) |
+| 10 | Rotational velocity of hinge joint that flexed the elbow | -Inf | Inf | r_elbow_flex_joint | hinge | angular velocity (rad/s) |
+| 11 | Rotational velocity of hinge that rolls the forearm | -Inf | Inf | r_forearm_roll_joint | hinge | angular velocity (rad/s) |
+| 12 | Rotational velocity of flexing the wrist | -Inf | Inf | r_wrist_flex_joint | hinge | angular velocity (rad/s) |
+| 13 | Rotational velocity of rolling the wrist | -Inf | Inf | r_wrist_roll_joint | hinge | angular velocity (rad/s) |
+| 14 | x-coordinate of the fingertip of the pusher | -Inf | Inf | tips_arm | slide | position (m) |
+| 15 | y-coordinate of the fingertip of the pusher | -Inf | Inf | tips_arm | slide | position (m) |
+| 16 | z-coordinate of the fingertip of the pusher | -Inf | Inf | tips_arm | slide | position (m) |
+| 17 | x-coordinate of the object to be moved | -Inf | Inf | object (obj_slidex) | slide | position (m) |
+| 18 | y-coordinate of the object to be moved | -Inf | Inf | object (obj_slidey) | slide | position (m) |
+| 19 | z-coordinate of the object to be moved | -Inf | Inf | object | cylinder | position (m) |
+| 20 | x-coordinate of the goal position of the object | -Inf | Inf | goal (goal_slidex) | slide | position (m) |
+| 21 | y-coordinate of the goal position of the object | -Inf | Inf | goal (goal_slidey) | slide | position (m) |
+| 22 | z-coordinate of the goal position of the object | -Inf | Inf | goal | sphere | position (m) |
+
+
+### Rewards
+The reward consists of two parts:
+- *reward_near *: This reward is a measure of how far the *fingertip*
+of the pusher (the unattached end) is from the object, with a more negative
+value assigned for when the pusher's *fingertip* is further away from the
+target. It is calculated as the negative vector norm of (position of
+the fingertip - position of target), or *-norm("fingertip" - "target")*.
+- *reward_dist *: This reward is a measure of how far the object is from
+the target goal position, with a more negative value assigned for object is
+further away from the target. It is calculated as the negative vector norm of
+(position of the object - position of goal), or *-norm("object" - "target")*.
+- *reward_control*: A negative reward for penalising the pusher if
+it takes actions that are too large. It is measured as the negative squared
+Euclidean norm of the action, i.e. as *- sum(action2)*.
+
+The total reward returned is ***reward*** *=* *reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near*
+
+Unlike other environments, Pusher does not allow you to specify weights for the individual reward terms.
+However, `info` does contain the keys *reward_dist* and *reward_ctrl*. Thus, if you'd like to weight the terms,
+you should create a wrapper that computes the weighted reward from `info`.
+
+
+### Starting State
+All pusher (not including object and goal) states start in
+(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0). A uniform noise in the range
+[-0.005, 0.005] is added to the velocity attributes only. The velocities of
+the object and goal are permanently set to 0. The object's x-position is selected uniformly
+between [-0.3, 0] while the y-position is selected uniformly between [-0.2, 0.2], and this
+process is repeated until the vector norm between the object's (x,y) position and origin is not greater
+than 0.17. The goal always have the same position of (0.45, -0.05, -0.323).
+
+The default framerate is 5 with each frame lasting for 0.01, giving rise to a *dt = 5 * 0.01 = 0.05*
+
+### Episode End
+
+The episode ends when any of the following happens:
+
+1. Truncation: The episode duration reaches a 100 timesteps.
+2. Termination: Any of the state space values is no longer finite.
+
+### Arguments
+
+No additional arguments are currently supported (in v2 and lower),
+but modifications can be made to the XML file in the assets folder
+(or by changing the path to a modified XML file in another folder)..
+
+```
+env = gymnasium.make('Pusher-v4')
+```
+
+There is no v3 for Pusher, unlike the robot environments where a v3 and
+beyond take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/reacher.md b/docs/environments/mujoco/reacher.md
new file mode 100644
index 000000000..a23f90220
--- /dev/null
+++ b/docs/environments/mujoco/reacher.md
@@ -0,0 +1,132 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Reacher
+---
+
+# Reacher
+
+```{figure} ../../_static/videos/mujoco/reacher.gif
+:width: 200px
+:name: reacher
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-1.0, 1.0, (2,), float32) |
+| Observation Shape | (11,) |
+| Observation High | [inf inf inf inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("Reacher-v4")` |
+
+
+### Description
+"Reacher" is a two-jointed robot arm. The goal is to move the robot's end effector (called *fingertip*) close to a
+target that is spawned at a random position.
+
+### Action Space
+The action space is a `Box(-1, 1, (2,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints.
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|---------------------------------------------------------------------------------|-------------|-------------|--------------------------|-------|------|
+| 0 | Torque applied at the first hinge (connecting the link to the point of fixture) | -1 | 1 | joint0 | hinge | torque (N m) |
+| 1 | Torque applied at the second hinge (connecting the two links) | -1 | 1 | joint1 | hinge | torque (N m) |
+
+### Observation Space
+
+Observations consist of
+
+- The cosine of the angles of the two arms
+- The sine of the angles of the two arms
+- The coordinates of the target
+- The angular velocities of the arms
+- The vector between the target and the reacher's fingertip (3 dimensional with the last element being 0)
+
+The observation is a `ndarray` with shape `(11,)` where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | ---------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ |
+| 0 | cosine of the angle of the first arm | -Inf | Inf | cos(joint0) | hinge | unitless |
+| 1 | cosine of the angle of the second arm | -Inf | Inf | cos(joint1) | hinge | unitless |
+| 2 | sine of the angle of the first arm | -Inf | Inf | cos(joint0) | hinge | unitless |
+| 3 | sine of the angle of the second arm | -Inf | Inf | cos(joint1) | hinge | unitless |
+| 4 | x-coordinate of the target | -Inf | Inf | target_x | slide | position (m) |
+| 5 | y-coordinate of the target | -Inf | Inf | target_y | slide | position (m) |
+| 6 | angular velocity of the first arm | -Inf | Inf | joint0 | hinge | angular velocity (rad/s) |
+| 7 | angular velocity of the second arm | -Inf | Inf | joint1 | hinge | angular velocity (rad/s) |
+| 8 | x-value of position_fingertip - position_target | -Inf | Inf | NA | slide | position (m) |
+| 9 | y-value of position_fingertip - position_target | -Inf | Inf | NA | slide | position (m) |
+| 10 | z-value of position_fingertip - position_target (0 since reacher is 2d and z is same for both) | -Inf | Inf | NA | slide | position (m) |
+
+
+Most Gymnasium environments just return the positions and velocity of the
+joints in the `.xml` file as the state of the environment. However, in
+reacher the state is created by combining only certain elements of the
+position and velocity, and performing some function transformations on them.
+If one is to read the `.xml` for reacher then they will find 4 joints:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|-----------------------------|----------|----------|----------------------------------|-------|--------------------|
+| 0 | angle of the first arm | -Inf | Inf | joint0 | hinge | angle (rad) |
+| 1 | angle of the second arm | -Inf | Inf | joint1 | hinge | angle (rad) |
+| 2 | x-coordinate of the target | -Inf | Inf | target_x | slide | position (m) |
+| 3 | y-coordinate of the target | -Inf | Inf | target_y | slide | position (m) |
+
+
+### Rewards
+The reward consists of two parts:
+- *reward_distance*: This reward is a measure of how far the *fingertip*
+of the reacher (the unattached end) is from the target, with a more negative
+value assigned for when the reacher's *fingertip* is further away from the
+target. It is calculated as the negative vector norm of (position of
+the fingertip - position of target), or *-norm("fingertip" - "target")*.
+- *reward_control*: A negative reward for penalising the walker if
+it takes actions that are too large. It is measured as the negative squared
+Euclidean norm of the action, i.e. as *- sum(action2)*.
+
+The total reward returned is ***reward*** *=* *reward_distance + reward_control*
+
+Unlike other environments, Reacher does not allow you to specify weights for the individual reward terms.
+However, `info` does contain the keys *reward_dist* and *reward_ctrl*. Thus, if you'd like to weight the terms,
+you should create a wrapper that computes the weighted reward from `info`.
+
+
+### Starting State
+All observations start in state
+(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+with a noise added for stochasticity. A uniform noise in the range
+[-0.1, 0.1] is added to the positional attributes, while the target position
+is selected uniformly at random in a disk of radius 0.2 around the origin.
+Independent, uniform noise in the
+range of [-0.005, 0.005] is added to the velocities, and the last
+element ("fingertip" - "target") is calculated at the end once everything
+is set. The default setting has a framerate of 2 and a *dt = 2 * 0.01 = 0.02*
+
+### Episode End
+
+The episode ends when any of the following happens:
+
+1. Truncation: The episode duration reaches a 50 timesteps (with a new random target popping up if the reacher's fingertip reaches it before 50 timesteps)
+2. Termination: Any of the state space values is no longer finite.
+
+### Arguments
+
+No additional arguments are currently supported (in v2 and lower),
+but modifications can be made to the XML file in the assets folder
+(or by changing the path to a modified XML file in another folder)..
+
+```
+env = gymnasium.make('Reacher-v4')
+```
+
+There is no v3 for Reacher, unlike the robot environments where a v3 and
+beyond take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/swimmer.md b/docs/environments/mujoco/swimmer.md
new file mode 100644
index 000000000..73f320f02
--- /dev/null
+++ b/docs/environments/mujoco/swimmer.md
@@ -0,0 +1,134 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Swimmer
+---
+
+# Swimmer
+
+```{figure} ../../_static/videos/mujoco/swimmer.gif
+:width: 200px
+:name: swimmer
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-1.0, 1.0, (2,), float32) |
+| Observation Shape | (8,) |
+| Observation High | [inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("Swimmer-v4")` |
+
+
+### Description
+
+This environment corresponds to the Swimmer environment described in Rémi Coulom's PhD thesis
+["Reinforcement Learning Using Neural Networks, with Applications to Motor Control"](https://tel.archives-ouvertes.fr/tel-00003985/document).
+The environment aims to increase the number of independent state and control
+variables as compared to the classic control environments. The swimmers
+consist of three or more segments ('***links***') and one less articulation
+joints ('***rotors***') - one rotor joint connecting exactly two links to
+form a linear chain. The swimmer is suspended in a two dimensional pool and
+always starts in the same position (subject to some deviation drawn from an
+uniform distribution), and the goal is to move as fast as possible towards
+the right by applying torque on the rotors and using the fluids friction.
+
+### Notes
+
+The problem parameters are:
+Problem parameters:
+* *n*: number of body parts
+* *mi*: mass of part *i* (*i* ∈ {1...n})
+* *li*: length of part *i* (*i* ∈ {1...n})
+* *k*: viscous-friction coefficient
+
+While the default environment has *n* = 3, *li* = 0.1,
+and *k* = 0.1. It is possible to pass a custom MuJoCo XML file during construction to increase the
+number of links, or to tweak any of the parameters.
+
+### Action Space
+The action space is a `Box(-1, 1, (2,), float32)`. An action represents the torques applied between *links*
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
+| 0 | Torque applied on the first rotor | -1 | 1 | rot2 | hinge | torque (N m) |
+| 1 | Torque applied on the second rotor | -1 | 1 | rot3 | hinge | torque (N m) |
+
+### Observation Space
+
+By default, observations consists of:
+* θi: angle of part *i* with respect to the *x* axis
+* θi': its derivative with respect to time (angular velocity)
+
+In the default case, observations do not include the x- and y-coordinates of the front tip. These may
+be included by passing `exclude_current_positions_from_observation=False` during construction.
+Then, the observation space will have 10 dimensions where the first two dimensions
+represent the x- and y-coordinates of the front tip.
+Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates
+will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively.
+
+By default, the observation is a `ndarray` with shape `(8,)` where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ |
+| 0 | angle of the front tip | -Inf | Inf | rot | hinge | angle (rad) |
+| 1 | angle of the first rotor | -Inf | Inf | rot2 | hinge | angle (rad) |
+| 2 | angle of the second rotor | -Inf | Inf | rot3 | hinge | angle (rad) |
+| 3 | velocity of the tip along the x-axis | -Inf | Inf | slider1 | slide | velocity (m/s) |
+| 4 | velocity of the tip along the y-axis | -Inf | Inf | slider2 | slide | velocity (m/s) |
+| 5 | angular velocity of front tip | -Inf | Inf | rot | hinge | angular velocity (rad/s) |
+| 6 | angular velocity of first rotor | -Inf | Inf | rot2 | hinge | angular velocity (rad/s) |
+| 7 | angular velocity of second rotor | -Inf | Inf | rot3 | hinge | angular velocity (rad/s) |
+
+### Rewards
+The reward consists of two parts:
+- *forward_reward*: A reward of moving forward which is measured
+as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is
+the time between actions and is dependent on the frame_skip parameter
+(default is 4), where the frametime is 0.01 - making the
+default *dt = 4 * 0.01 = 0.04*. This reward would be positive if the swimmer
+swims right as desired.
+- *ctrl_cost*: A cost for penalising the swimmer if it takes
+actions that are too large. It is measured as *`ctrl_cost_weight` *
+sum(action2)* where *`ctrl_cost_weight`* is a parameter set for the
+control and has a default value of 1e-4
+
+The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
+
+### Starting State
+All observations start in state (0,0,0,0,0,0,0,0) with a Uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the initial state for stochasticity.
+
+### Episode End
+The episode truncates when the episode length is greater than 1000.
+
+### Arguments
+
+No additional arguments are currently supported in v2 and lower.
+
+```
+gymnasium.make('Swimmer-v4')
+```
+
+v3 and v4 take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+```
+env = gymnasium.make('Swimmer-v4', ctrl_cost_weight=0.1, ....)
+```
+
+| Parameter | Type | Default | Description |
+| -------------------------------------------- | --------- | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `xml_file` | **str** | `"swimmer.xml"` | Path to a MuJoCo model |
+| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
+| `ctrl_cost_weight` | **float** | `1e-4` | Weight for _ctrl_cost_ term (see section on reward) |
+| `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
+| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v3: support for gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/mujoco/walker2d.md b/docs/environments/mujoco/walker2d.md
new file mode 100644
index 000000000..3bd96a114
--- /dev/null
+++ b/docs/environments/mujoco/walker2d.md
@@ -0,0 +1,151 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Walker2D
+lastpage:
+---
+
+# Walker2D
+
+```{figure} ../../_static/videos/mujoco/walker2d.gif
+:width: 200px
+:name: walker2d
+```
+
+This environment is part of the Mujoco environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Box(-1.0, 1.0, (6,), float32) |
+| Observation Shape | (17,) |
+| Observation High | [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf] |
+| Observation Low | [-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf] |
+| Import | `gymnasium.make("Walker2d-v4")` |
+
+
+### Description
+
+This environment builds on the hopper environment based on the work done by Erez, Tassa, and Todorov
+in ["Infinite Horizon Model Predictive Control for Nonlinear Periodic Tasks"](http://www.roboticsproceedings.org/rss07/p10.pdf)
+by adding another set of legs making it possible for the robot to walker forward instead of
+hop. Like other Mujoco environments, this environment aims to increase the number of independent state
+and control variables as compared to the classic control environments. The walker is a
+two-dimensional two-legged figure that consist of four main body parts - a single torso at the top
+(with the two legs splitting after the torso), two thighs in the middle below the torso, two legs
+in the bottom below the thighs, and two feet attached to the legs on which the entire body rests.
+The goal is to make coordinate both sets of feet, legs, and thighs to move in the forward (right)
+direction by applying torques on the six hinges connecting the six body parts.
+
+### Action Space
+The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints.
+
+| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit |
+|-----|----------------------------------------|-------------|-------------|----------------------------------|-------|--------------|
+| 0 | Torque applied on the thigh rotor | -1 | 1 | thigh_joint | hinge | torque (N m) |
+| 1 | Torque applied on the leg rotor | -1 | 1 | leg_joint | hinge | torque (N m) |
+| 2 | Torque applied on the foot rotor | -1 | 1 | foot_joint | hinge | torque (N m) |
+| 3 | Torque applied on the left thigh rotor | -1 | 1 | thigh_left_joint | hinge | torque (N m) |
+| 4 | Torque applied on the left leg rotor | -1 | 1 | leg_left_joint | hinge | torque (N m) |
+| 5 | Torque applied on the left foot rotor | -1 | 1 | foot_left_joint | hinge | torque (N m) |
+
+### Observation Space
+
+Observations consist of positional values of different body parts of the walker,
+followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities.
+
+By default, observations do not include the x-coordinate of the top. It may
+be included by passing `exclude_current_positions_from_observation=False` during construction.
+In that case, the observation space will have 18 dimensions where the first dimension
+represent the x-coordinates of the top of the walker.
+Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate
+of the top will be returned in `info` with key `"x_position"`.
+
+By default, observation is a `ndarray` with shape `(17,)` where the elements correspond to the following:
+
+| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit |
+| --- | ------------------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ |
+| 0 | z-coordinate of the top (height of hopper) | -Inf | Inf | rootz (torso) | slide | position (m) |
+| 1 | angle of the top | -Inf | Inf | rooty (torso) | hinge | angle (rad) |
+| 2 | angle of the thigh joint | -Inf | Inf | thigh_joint | hinge | angle (rad) |
+| 3 | angle of the leg joint | -Inf | Inf | leg_joint | hinge | angle (rad) |
+| 4 | angle of the foot joint | -Inf | Inf | foot_joint | hinge | angle (rad) |
+| 5 | angle of the left thigh joint | -Inf | Inf | thigh_left_joint | hinge | angle (rad) |
+| 6 | angle of the left leg joint | -Inf | Inf | leg_left_joint | hinge | angle (rad) |
+| 7 | angle of the left foot joint | -Inf | Inf | foot_left_joint | hinge | angle (rad) |
+| 8 | velocity of the x-coordinate of the top | -Inf | Inf | rootx | slide | velocity (m/s) |
+| 9 | velocity of the z-coordinate (height) of the top | -Inf | Inf | rootz | slide | velocity (m/s) |
+| 10 | angular velocity of the angle of the top | -Inf | Inf | rooty | hinge | angular velocity (rad/s) |
+| 11 | angular velocity of the thigh hinge | -Inf | Inf | thigh_joint | hinge | angular velocity (rad/s) |
+| 12 | angular velocity of the leg hinge | -Inf | Inf | leg_joint | hinge | angular velocity (rad/s) |
+| 13 | angular velocity of the foot hinge | -Inf | Inf | foot_joint | hinge | angular velocity (rad/s) |
+| 14 | angular velocity of the thigh hinge | -Inf | Inf | thigh_left_joint | hinge | angular velocity (rad/s) |
+| 15 | angular velocity of the leg hinge | -Inf | Inf | leg_left_joint | hinge | angular velocity (rad/s) |
+| 16 | angular velocity of the foot hinge | -Inf | Inf | foot_left_joint | hinge | angular velocity (rad/s) |
+### Rewards
+The reward consists of three parts:
+- *healthy_reward*: Every timestep that the walker is alive, it receives a fixed reward of value `healthy_reward`,
+- *forward_reward*: A reward of walking forward which is measured as
+*`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*.
+*dt* is the time between actions and is dependeent on the frame_skip parameter
+(default is 4), where the frametime is 0.002 - making the default
+*dt = 4 * 0.002 = 0.008*. This reward would be positive if the walker walks forward (right) desired.
+- *ctrl_cost*: A cost for penalising the walker if it
+takes actions that are too large. It is measured as
+*`ctrl_cost_weight` * sum(action2)* where *`ctrl_cost_weight`* is
+a parameter set for the control and has a default value of 0.001
+
+The total reward returned is ***reward*** *=* *healthy_reward bonus + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms
+
+### Starting State
+All observations start in state
+(0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity.
+
+### Episode End
+The walker is said to be unhealthy if any of the following happens:
+
+1. Any of the state space values is no longer finite
+2. The height of the walker is ***not*** in the closed interval specified by `healthy_z_range`
+3. The absolute value of the angle (`observation[1]` if `exclude_current_positions_from_observation=False`, else `observation[2]`) is ***not*** in the closed interval specified by `healthy_angle_range`
+
+If `terminate_when_unhealthy=True` is passed during construction (which is the default),
+the episode ends when any of the following happens:
+
+1. Truncation: The episode duration reaches a 1000 timesteps
+2. Termination: The walker is unhealthy
+
+If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded.
+
+### Arguments
+
+No additional arguments are currently supported in v2 and lower.
+
+```
+env = gymnasium.make('Walker2d-v4')
+```
+
+v3 and beyond take gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc.
+
+```
+env = gymnasium.make('Walker2d-v4', ctrl_cost_weight=0.1, ....)
+```
+
+| Parameter | Type | Default | Description |
+| -------------------------------------------- | --------- | ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `xml_file` | **str** | `"walker2d.xml"` | Path to a MuJoCo model |
+| `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) |
+| `ctrl_cost_weight` | **float** | `1e-3` | Weight for _ctr_cost_ term (see section on reward) |
+| `healthy_reward` | **float** | `1.0` | Constant reward given if the ant is "healthy" after timestep |
+| `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the z-coordinate of the walker is no longer healthy |
+| `healthy_z_range` | **tuple** | `(0.8, 2)` | The z-coordinate of the top of the walker must be in this range to be considered healthy |
+| `healthy_angle_range` | **tuple** | `(-1, 1)` | The angle must be in this range to be considered healthy |
+| `reset_noise_scale` | **float** | `5e-3` | Scale of random perturbations of initial position and velocity (see section on Starting State) |
+| `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies |
+
+
+### Version History
+
+* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3
+* v3: support for gymnasium.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen)
+* v2: All continuous control environments now use mujoco_py >= 1.50
+* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments.
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/third_party_environments/index.md b/docs/environments/third_party_environments/index.md
new file mode 100644
index 000000000..93e6eacdf
--- /dev/null
+++ b/docs/environments/third_party_environments/index.md
@@ -0,0 +1,391 @@
+# Third Party Environments
+
+## Video Game Environments
+
+### [ViZDoom](https://github.com/mwydmuch/ViZDoom)
+
+An environment centered around the original [Doom](https://en.wikipedia.org/wiki/Doom_(1993_video_game)) game, focusing on visual control (from image to actions) at thousands of frames per second. ViZDoom supports depth and automatic annotation/labels buffers, as well as accessing the sound. The Gym wrappers provide easy-to-use access to the example scenarios that come with ViZDoom. Since 2016, the [ViZDoom paper](https://arxiv.org/abs/1605.02097) has been cited more than 600 times.
+
+### [ flappy-bird-gym: A Flappy Bird environment for OpenAI Gym](https://github.com/Talendar/flappy-bird-gym)
+
+A simple environment for single-agent reinforcement learning algorithms on a clone of [Flappy Bird](https://en.wikipedia.org/wiki/Flappy_Bird), the hugely popular arcade-style mobile game. Both state and pixel observation environments are available.
+
+### [ gym-derk: GPU accelerated MOBA environment](https://gymnasium.derkgame.com)
+
+This is a 3v3 MOBA environment where you train creatures to fight each other. It runs entirely on the GPU so you can easily have hundreds of instances running in parallel. There are around 15 items for the creatures, 60 "senses", 5 actions, and roughly 23 tweakable rewards. It's also possible to benchmark an agent against other agents online. It's available for free for training for personal use, and otherwise costs money; see licensing details on the website
+
+### [ MineRL](https://github.com/minerllabs/minerl)
+
+Gym interface with Minecraft game, focused on a specific sparse reward challenge
+
+### [ Procgen](https://github.com/openai/procgen)
+
+16 simple-to-use procedurally-generated gym environments which provide a direct measure of how quickly a reinforcement learning agent learns generalizable skills. The environments run at high speed (thousands of steps per second) on a single core.
+
+### [ SlimeVolleyGym: A simple environment for single and multi-agent reinforcement learning](https://github.com/hardmaru/slimevolleygym)
+
+A simple environment for benchmarking single and multi-agent reinforcement learning algorithms on a clone of Slime Volleyball game. Only dependencies are gym and numpy. Both state and pixel observation environments are available. The motivation of this environment is to easily enable trained agents to play against each other, and also facilitate the training of agents directly in a multi-agent setting, thus adding an extra dimension for evaluating an agent's performance.
+
+### [ stable-retro](https://github.com/MatPoliquin/stable-retro)
+
+Supported fork of gym-retro with additional games, states, scenarios, etc. Open to PRs of additional games, features and plateforms since gym-retro is no longer maintained
+
+### [ Unity ML Agents](https://github.com/Unity-Technologies/ml-agents)
+
+Gym wrappers for arbitrary and premade environments with the Unity game engine.
+
+## Classic Environments (board, card, etc. games)
+
+### [ gym-abalone: A two-player abstract strategy board game](https://github.com/towzeur/gym-abalone)
+
+An implementation of the board game Abalone.
+
+### [ gym-spoof](https://github.com/MouseAndKeyboard/gym-spoof)
+
+Spoof, otherwise known as "The 3-coin game", is a multi-agent (2 player), imperfect-information, zero-sum game.
+
+### [ gym-xiangqi: Xiangqi - The Chinese Chess Game](https://github.com/tanliyon/gym-xiangqi)
+
+A reinforcement learning environment of Xiangqi, the Chinese Chess game.
+
+### [ RubiksCubeGym](https://github.com/DoubleGremlin181/RubiksCubeGym)
+
+The RubiksCubeGym package provides environments for twisty puzzles with multiple reward functions to help simluate the methods used by humans.
+
+### [ GymGo](https://github.com/aigagror/GymGo)
+
+The board game Go, also known as Weiqi. The game that was famously conquered by AlphaGo.
+
+## Robotics Environments
+
+### [ GymFC: A flight control tuning and training framework](https://github.com/wil3/gymfc/)
+
+GymFC is a modular framework for synthesizing neuro-flight controllers. The architecture integrates digital twinning concepts to provide seamless transfer of trained policies to hardware. The OpenAI environment has been used to generate policies for the worlds first open source neural network flight control firmware [Neuroflight](https://github.com/wil3/neuroflight).
+
+### [ gym-gazebo](https://github.com/erlerobot/gym-gazebo/)
+
+gym-gazebo presents an extension of the initial OpenAI gym for robotics using ROS and Gazebo, an advanced 3D modeling and
+rendering tool.
+
+### [ gym-goddard: Goddard's Rocket Problem](https://github.com/osannolik/gym-goddard)
+
+An environment for simulating the classical optimal control problem where the thrust of a vertically ascending rocket shall be determined such that it reaches the maximum possible altitude, while being subject to varying aerodynamic drag, gravity and mass.
+
+### [ gym-jiminy: training Robots in Jiminy](https://github.com/Wandercraft/jiminy)
+
+gym-jiminy presents an extension of the initial OpenAI gym for robotics using Jiminy, an extremely fast and light weight simulator for poly-articulated systems using Pinocchio for physics evaluation and Meshcat for web-based 3D rendering.
+
+### [ gym-miniworld](https://github.com/maximecb/gym-miniworld)
+
+MiniWorld is a minimalistic 3D interior environment simulator for reinforcement learning & robotics research. It can be used to simulate environments with rooms, doors, hallways and various objects (eg: office and home environments, mazes). MiniWorld can be seen as an alternative to VizDoom or DMLab. It is written 100% in Python and designed to be easily modified or extended.
+
+### [ gym-pybullet-drones](https://github.com/JacopoPan/gym-pybullet-drones)
+
+A simple environment using [PyBullet](https://github.com/bulletphysics/bullet3) to simulate the dynamics of a [Bitcraze Crazyflie 2.x](https://www.bitcraze.io/documentation/hardware/crazyflie_2_1/crazyflie_2_1-datasheet.pdf) nanoquadrotor.
+
+### [ MarsExplorer](https://github.com/dimikout3/MarsExplorer)
+
+Mars Explorer is an openai-gym compatible environment designed and developed as an initial endeavor to bridge the gap between powerful Deep Reinforcement Learning methodologies and the problem of exploration/coverage of an unknown terrain.
+
+### [ panda-gym ](https://github.com/qgallouedec/panda-gym/)
+
+PyBullet based simulations of a robotic arm moving objects.
+
+### [ PyBullet Robotics Environments](https://docs.google.com/document/d/10sXEhzFRSnvFcl3XxNGhnD4N2SedqwdAvK3dsihxVUA/edit#heading=h.wz5to0x8kqmr)
+
+3D physics environments like the Mujoco environments but uses the Bullet physics engine and does not require a commercial license. Works on Mac/Linux/Windows.
+
+### [ robo-gym](https://github.com/jr-robotics/robo-gym)
+
+robo-gym provides a collection of reinforcement learning environments involving robotic tasks applicable in both simulation and real world robotics.
+
+### [ Offworld-gym](https://github.com/offworld-projects/offworld-gym)
+
+Gym environments that let you control physics robotics in a laboratory via the internet.
+
+## Autonomous Driving and Traffic Control Environments
+
+### [ gym-carla](https://github.com/cjy1992/gym-carla)
+
+gym-carla provides a gym wrapper for the [CARLA simulator](http://carla.org/), which is a realistic 3D simulator for autonomous driving research. The environment includes a virtual city with several surrounding vehicles running around. Multiple source of observations are provided for the ego vehicle, such as front-view camera image, lidar point cloud image, and birdeye view semantic mask. Several applications have been developed based on this wrapper, such as deep reinforcement learning for end-to-end autonomous driving.
+
+### [ gym-duckietown](https://github.com/duckietown/gym-duckietown)
+
+A lane-following simulator built for the [Duckietown](http://duckietown.org/) project (small-scale self-driving car course).
+
+### [ gym-electric-motor](https://github.com/upb-lea/gym-electric-motor)
+
+An environment for simulating a wide variety of electric drives taking into account different types of electric motors and converters. Control schemes can be continuous, yielding a voltage duty cycle, or discrete, determining converter switching states directly.
+
+### [ highway-env](https://github.com/eleurent/highway-env)
+
+An environment for behavioural planning in autonomous driving, with an emphasis on high-level perception and decision rather than low-level sensing and control. The difficulty of the task lies in understanding the social interactions with other drivers, whose behaviours are uncertain. Several scenes are proposed, such as highway, merge, intersection and roundabout.
+
+### [ LongiControl](https://github.com/dynamik1703/gym_longicontrol)
+
+An environment for the stochastic longitudinal control of an electric vehicle. It is intended to be a descriptive and comprehensible example for a continuous real-world problem within the field of autonomous driving.
+
+### [ sumo-rl](https://github.com/LucasAlegre/sumo-rl)
+
+Gym wrapper for various environments in the Sumo traffic simulator
+
+### [ CommonRoad-RL](https://commonroad.in.tum.de/commonroad-rl)
+
+A Gym for solving motion planning problems for various traffic scenarios compatible with [CommonRoad benchmarks](https://commonroad.in.tum.de/scenarios), which provides configurable rewards, action spaces, and observation spaces.
+
+
+## Multi Agents
+
+### [PettingZoo](https://github.com/Farama-Foundation/PettingZoo)
+PettingZoo is a Python library for conducting research in multi-agent reinforcement learning, akin to a multi-agent version of Gym.
+
+
+## Other Environments
+
+### [ anomalous_rl_envs](https://github.com/modanesh/anomalous_rl_envs)
+
+A set of environments from control tasks: Acrobot, CartPole, and LunarLander with various types of anomalies injected into them. It could be very useful to study the behavior and robustness of a policy.
+
+### [ CARL](https://github.com/automl/CARL)
+
+Configurable reinforcement learning environments for testing generalization, e.g. CartPole with variable pole lengths or Brax robots with different ground frictions.
+
+### [ CompilerGym](https://github.com/facebookresearch/CompilerGym)
+
+Reinforcement learning environments for compiler optimization tasks, such as LLVM phase ordering, GCC flag tuning, and CUDA loop nest code generation.
+
+### [ DACBench](https://github.com/automl/DACBench)
+
+Environments for hyperparameter configuration using RL. Includes cheap surrogate benchmarks as well as real-world algorithms from e.g. AI Planning, Evolutionary Computation and Deep Learning.
+
+### [ Gridworld](https://github.com/addy1997/Gridworld)
+
+The Gridworld package provides grid-based environments to help simulate the results for model-based reinforcement learning algorithms. Initial release supports single agent system only. Some features in this version of software have become obsolete. New features are being added in the software like windygrid environment.
+
+### [ gym-adserve](https://github.com/falox/gym-adserver)
+
+An environment that implements a typical [multi-armed bandit scenario](https://en.wikipedia.org/wiki/Multi-armed_bandit) where an [ad server](https://en.wikipedia.org/wiki/Ad_serving) must select the best advertisement to be displayed in a web page. Some example agents are included: Random, epsilon-Greedy, Softmax, and UCB1.
+
+### [ gym-algorithmic](https://github.com/Rohan138/gym-algorithmic)
+
+These are a variety of algorithmic tasks, such as learning to copy a sequence, present in Gym prior to Gym 0.20.0.
+
+### [ gym-anytrading](https://github.com/AminHP/gym-anytrading)
+
+AnyTrading is a collection of OpenAI Gym environments for reinforcement learning-based trading algorithms with a great focus on simplicity, flexibility, and comprehensiveness.
+
+### [ gym-autokey](https://github.com/Flunzmas/gym-autokey)
+
+An environment for automated rule-based deductive program verification in the KeY verification system.
+
+### [ gym-ccc](https://github.com/acxz/gym-ccc)
+
+Environments that extend gym's classic control and add many new features including continuous action spaces.
+
+### [ gym-cellular-automata](https://github.com/elbecerrasoto/gym-cellular-automata)
+
+Environments where the agent interacts with _Cellular Automata_ by changing its cells states.
+
+### [ gym-games](https://github.com/qlan3/gym-games)
+
+Gym implementations of the MinAtar games, various PyGame Learning Environment games, and various custom exploration games
+
+### [ gym-inventory](https://github.com/paulhendricks/gym-inventory)
+
+gym-inventory is a single agent domain featuring discrete state and action spaces that an AI agent might encounter in inventory control problems.
+
+### [ gym-maze](https://github.com/tuzzer/gym-maze/)
+
+A simple 2D maze environment where an agent finds its way from the start position to the goal.
+
+### [ gym-mtsim](https://github.com/AminHP/gym-mtsim)
+
+MtSim is a general-purpose, flexible, and easy-to-use simulator alongside an OpenAI Gym trading environment for MetaTrader 5 trading platform.
+
+### [ gym-legacy-toytext](https://github.com/Rohan138/gym-legacy-toytext)
+
+These are the unused toy-text environments present in Gym prior to Gym 0.20.0.
+
+### [ gym-riverswim](https://github.com/erfanMhi/gym-riverswim)
+
+A simple environment for benchmarking reinforcement learning exploration techniques in a simplified setting. Hard exploration.
+
+### [ gym-recsys](https://github.com/zuoxingdong/gym-recsys)
+
+This package describes an OpenAI Gym interface for creating a simulation environment of reinforcement learning-based recommender systems (RL-RecSys). The design strives for simple and flexible APIs to support novel research.
+
+### [ gym-sokoban](https://github.com/mpSchrader/gym-sokoban)
+
+2D Transportation Puzzles. The environment consists of transportation puzzles in which the player's goal is to push all boxes on the warehouse's storage locations. The advantage of the environment is that it generates a new random level every time it is initialized or reset, which prevents over fitting to predefined levels.
+
+### [ math-prog-synth-env](https://github.com/JohnnyYeeee/math_prog_synth_env)
+
+In our paper "A Reinforcement Learning Environment for Mathematical Reasoning via Program Synthesis" we convert the DeepMind Mathematics Dataset into an RL environment based around program synthesis.https://arxiv.org/abs/2107.07373
+
+### [ NASGym](https://github.com/gomerudo/nas-env)
+
+The environment is fully-compatible with the OpenAI baselines and exposes a NAS environment following the Neural Structure Code of [BlockQNN: Efficient Block-wise Neural Network Architecture Generation](https://arxiv.org/abs/1808.05584). Under this setting, a Neural Network (i.e. the state for the reinforcement learning agent) is modeled as a list of NSCs, an action is the addition of a layer to the network, and the reward is the accuracy after the early-stop training. The datasets considered so far are the CIFAR-10 dataset (available by default) and the meta-dataset (has to be manually downloaded as specified in [this repository](https://github.com/gomerudo/meta-dataset)).
+
+### [ NLPGym: A toolkit to develop RL agents to solve NLP tasks](https://github.com/rajcscw/nlp-gym)
+
+[NLPGym](https://arxiv.org/pdf/2011.08272v1.pdf) provides interactive environments for standard NLP tasks such as sequence tagging, question answering, and sequence classification. Users can easily customize the tasks with their own datasets, observations, featurizers and reward functions.
+
+### [ Obstacle Tower](https://github.com/Unity-Technologies/obstacle-tower-env)
+
+3D procedurally generated tower where you have to climb to the highest level possible
+
+### [ openmodelica-microgrid-gym](https://github.com/upb-lea/openmodelica-microgrid-gym)
+
+The OpenModelica Microgrid Gym (OMG) package is a software toolbox for the simulation and control optimization of microgrids based on energy conversion by power electronic converters.
+
+### [ osim-rl](https://github.com/stanfordnmbl/osim-rl)
+
+Musculoskeletal Models in OpenSim. A human musculoskeletal model and a physics-based simulation environment where you can synthesize physically and physiologically accurate motion. One of the environments built in this framework is a competition environment for a NIPS 2017 challenge.
+
+### [ PGE: Parallel Game Engine](https://github.com/222464/PGE)
+
+PGE is a FOSS 3D engine for AI simulations, and can interoperate with the Gym. Contains environments with modern 3D graphics, and uses Bullet for physics.
+
+### [ QASGym](https://github.com/qdevpsi3/quantum-arch-search)
+
+This a list of environments for quantum architecture search following the description in [Quantum Architecture Search via Deep Reinforcement Learning](https://arxiv.org/abs/2104.07715). The agent design the quantum circuit by taking actions in the environment. Each action corresponds to a gate applied on some wires. The goal is to build a circuit U such that generates the target n-qubit quantum state that belongs to the environment and hidden from the agent. The circuits are built using [Google QuantumAI Cirq](https://quantumai.google/cirq).
+
+### [ safe-control-gym](https://github.com/utiasDSL/safe-control-gym)
+
+PyBullet based CartPole and Quadrotor environments—with [CasADi](https://web.casadi.org) (symbolic) *a priori* dynamics and constraints—for learning-based control and model-based reinforcement learning.
+
+### [ VirtualTaobao](https://github.com/eyounx/VirtualTaobao/)
+
+An environment for online recommendation, where customers are learned from Taobao.com, one of the world's largest e-commerce platform.
+
+### [ mo-gym](https://github.com/LucasAlegre/mo-gym)
+
+Multi-objective RL (MORL) gym environments, where the reward is a numpy array of different (possibly conflicting) objectives.
+
+### [ABIDES-Gym](https://github.com/jpmorganchase/abides-jpmc-public)
+
+ABIDES (Agent Based Interactive Discrete Event Simulator) is a message based multi agent discrete event based simulator. It enables simulating complex multi-agent systems for different domains. ABIDES has already supported work in [equity markets simulation](https://arxiv.org/abs/1904.12066) and [federated learning](https://dl.acm.org/doi/abs/10.1145/3383455.3422562).
+
+[ABIDES-Gym](https://arxiv.org/abs/2110.14771) (ACM-ICAIF21 publication) is a new wrapper built around ABIDES that enables using ABIDES simulator as an Open AI Gym environment for the training of Reinforcement Learning algorithms.
+
+We apply this work by specifically using the markets extension of ABIDES/ABIDES-Markets and developing two benchmark financial market Gym environments for training daily investor and execution agents. As a result, these two environments describe classic financial problems with a complex interactive market behavior response to the experimental agent's action.
+
+### [gym-saturation](https://github.com/inpefess/gym-saturation)
+
+An environment for guiding automated theorem provers based on saturation algorithms (e.g. [Vampire](https://github.com/vprover/vampire)).
+
+### [ShinRL](https://github.com/omron-sinicx/ShinRL/)
+
+ShinRL: A Library for Evaluating RL Algorithms from Theoretical and Practical Perspectives (Deep RL Workshop 2021)
+
+### [racing-rl](https://github.com/luigiberducci/racing-rl/)
+
+reinforcement learning for f1tenth racing
+
+### [go-explore](https://github.com/qgallouedec/go-explore/)
+
+Unofficial implementation of the Go-Explore algorithm presented in [First return then explore](https://arxiv.org/abs/2004.12919) based on [stable-baselines3](https://github.com/DLR-RM/stable-baselines3).
+
+### [tmrl](https://github.com/trackmania-rl/tmrl/)
+
+TrackMania 2020 through RL
+
+### [racing_dreamer](https://github.com/CPS-TUWien/racing_dreamer/)
+
+Latent Imagination Facilitates Zero-Shot Transfer in Autonomous Racing
+
+### [racecar_gym](https://github.com/axelbr/racecar_gym/)
+
+A gym environment for a miniature racecar using the pybullet physics engine.
+
+### [jiminy](https://github.com/duburcqa/jiminy/)
+
+Jiminy: a fast and portable Python/C++ simulator of poly-articulated systems with OpenAI Gym interface for reinforcement learning
+
+### [evogym-design-tool](https://github.com/EvolutionGym/evogym-design-tool/)
+
+Design tool for creating Evolution Gym environments.
+
+### [l2r](https://github.com/learn-to-race/l2r/)
+
+Open-source reinforcement learning environment for autonomous racing.
+
+### [gym_torcs](https://github.com/ugo-nama-kun/gym_torcs/)
+
+Gym-TORCS is the reinforcement learning (RL) environment in TORCS domain with OpenAI-gym-like interface. TORCS is the open-rource realistic car racing simulator recently used as RL benchmark task in several AI studies.
+
+### [mobile-env](https://github.com/stefanbschneider/mobile-env/)
+
+An open, minimalist Gym environment for autonomous coordination in wireless mobile networks.
+
+### [gym-softrobot](https://github.com/skim0119/gym-softrobot/)
+
+Softrobotics environment package for OpenAI Gym
+
+### [PyElastica](https://github.com/GazzolaLab/PyElastica/)
+
+Python implementation of Elastica, an open-source software for the simulation of assemblies of slender, one-dimensional structures using Cosserat Rod theory.
+
+### [tuxkart-ai](https://github.com/notjedi/tuxkart-ai/)
+
+RL agent for the SuperTuxKart game.
+
+### [ostrichrl](https://github.com/vittorione94/ostrichrl/)
+
+This is the repository accompanying the paper [OstrichRL: A Musculoskeletal Ostrich Simulation to Study Bio-mechanical Locomotion](https://arxiv.org/abs/2112.06061).
+
+### [quadruped-gym](https://github.com/dtch1997/quadruped-gym/)
+
+An OpenAI gym environment for the training of legged robots
+
+### [Pogo-Stick-Jumping](https://github.com/asalbright/Pogo-Stick-Jumping/)
+
+OpenAI gym environment, testing and evaluation.
+
+### [evogym](https://github.com/EvolutionGym/evogym/)
+
+A large-scale benchmark for co-optimizing the design and control of soft robots, as seen in NeurIPS 2021.
+
+### [iGibson](https://github.com/StanfordVL/iGibson/)
+
+A Simulation Environment to train Robots in Large Realistic Interactive Scenes
+
+### [SnakeRL](https://github.com/tboulet/SnakeRL/)
+
+Repo for Snake RL
+
+### [starship-landing-gym](https://github.com/Armandpl/starship-landing-gym/)
+
+A Gym env for propulsive rocket landing.
+
+### [CompilerGym](https://github.com/facebookresearch/CompilerGym/)
+
+Reinforcement learning environments for compiler and program optimization tasks
+
+### [RaveForce](https://github.com/chaosprint/RaveForce/)
+
+RaveForce - An OpenAI Gym style toolkit for music generation experiments.
+
+### [gym-line-follower](https://github.com/nplan/gym-line-follower/)
+
+Line follower robot simulator environment for Open AI Gym.
+
+### [DexterousHands](https://github.com/PKU-MARL/DexterousHands/)
+
+This is a library that provides dual dexterous hand manipulation tasks through Isaac Gym
+
+### [OmniIsaacGymEnvs](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs/)
+
+Reinforcement Learning Environments for Omniverse Isaac Gym
+
+### [border](https://github.com/taku-y/border/)
+
+A reinforcement learning library in Rust
+
+### [SpaceRobotEnv](https://github.com/Tsinghua-Space-Robot-Learning-Group/SpaceRobotEnv/)
+
+A gym environment designed for free-floating space robot control based on the MuJoCo platform.
+
+### [gymnax](https://github.com/RobertTLange/gymnax/)
+
+RL Environments in JAX 🌍
diff --git a/docs/environments/toy_text/blackjack.md b/docs/environments/toy_text/blackjack.md
new file mode 100644
index 000000000..d284196c3
--- /dev/null
+++ b/docs/environments/toy_text/blackjack.md
@@ -0,0 +1,82 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Blackjack
+firstpage:
+---
+
+# Blackjack
+
+```{figure} ../../_static/videos/toy_text/blackjack.gif
+:width: 200px
+:name: blackjack
+```
+
+This environment is part of the Toy Text environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(2) |
+| Observation Space | Tuple(Discrete(32), Discrete(11), Discrete(2)) |
+| Import | `gymnasium.make("Blackjack-v1")` |
+
+
+Blackjack is a card game where the goal is to beat the dealer by obtaining cards
+that sum to closer to 21 (without going over 21) than the dealers cards.
+
+### Description
+Card Values:
+
+- Face cards (Jack, Queen, King) have a point value of 10.
+- Aces can either count as 11 (called a 'usable ace') or 1.
+- Numerical cards (2-9) have a value equal to their number.
+
+This game is played with an infinite deck (or with replacement).
+The game starts with the dealer having one face up and one face down card,
+while the player has two face up cards.
+
+The player can request additional cards (hit, action=1) until they decide to stop (stick, action=0)
+or exceed 21 (bust, immediate loss).
+After the player sticks, the dealer reveals their facedown card, and draws
+until their sum is 17 or greater. If the dealer goes bust, the player wins.
+If neither the player nor the dealer busts, the outcome (win, lose, draw) is
+decided by whose sum is closer to 21.
+
+### Action Space
+There are two actions: stick (0), and hit (1).
+
+### Observation Space
+The observation consists of a 3-tuple containing: the player's current sum,
+the value of the dealer's one showing card (1-10 where 1 is ace),
+and whether the player holds a usable ace (0 or 1).
+
+This environment corresponds to the version of the blackjack problem
+described in Example 5.1 in Reinforcement Learning: An Introduction
+by Sutton and Barto (http://incompleteideas.net/book/the-book-2nd.html).
+
+### Rewards
+- win game: +1
+- lose game: -1
+- draw game: 0
+- win game with natural blackjack:
+
+ +1.5 (if natural is True)
+
+ +1 (if natural is False)
+
+### Arguments
+
+```
+gymnasium.make('Blackjack-v1', natural=False, sab=False)
+```
+
+`natural=False`: Whether to give an additional reward for
+starting with a natural blackjack, i.e. starting with an ace and ten (sum is 21).
+
+`sab=False`: Whether to follow the exact rules outlined in the book by
+Sutton and Barto. If `sab` is `True`, the keyword argument `natural` will be ignored.
+If the player achieves a natural blackjack and the dealer does not, the player
+will win (i.e. get a reward of +1). The reverse rule does not apply.
+If both the player and the dealer get a natural, it will be a draw (i.e. reward 0).
+
+### Version History
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/toy_text/cliff_walking.md b/docs/environments/toy_text/cliff_walking.md
new file mode 100644
index 000000000..4d35d698a
--- /dev/null
+++ b/docs/environments/toy_text/cliff_walking.md
@@ -0,0 +1,64 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Cliff Walking
+---
+
+# Cliff Walking
+
+```{figure} ../../_static/videos/toy_text/cliff_walking.gif
+:width: 200px
+:name: cliff_walking
+```
+
+This environment is part of the Toy Text environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(4) |
+| Observation Space | Discrete(48) |
+| Import | `gymnasium.make("CliffWalking-v0")` |
+
+
+This is a simple implementation of the Gridworld Cliff
+reinforcement learning task.
+
+Adapted from Example 6.6 (page 106) from [Reinforcement Learning: An Introduction
+by Sutton and Barto](http://incompleteideas.net/book/bookdraft2018jan1.pdf).
+
+With inspiration from:
+[https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py]
+(https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py)
+
+### Description
+The board is a 4x12 matrix, with (using NumPy matrix indexing):
+- [3, 0] as the start at bottom-left
+- [3, 11] as the goal at bottom-right
+- [3, 1..10] as the cliff at bottom-center
+
+If the agent steps on the cliff, it returns to the start.
+An episode terminates when the agent reaches the goal.
+
+### Actions
+There are 4 discrete deterministic actions:
+- 0: move up
+- 1: move right
+- 2: move down
+- 3: move left
+
+### Observations
+There are 3x12 + 1 possible states. In fact, the agent cannot be at the cliff, nor at the goal
+(as this results in the end of the episode).
+It remains all the positions of the first 3 rows plus the bottom-left cell.
+The observation is simply the current position encoded as [flattened index](https://numpy.org/doc/stable/reference/generated/numpy.unravel_index.html).
+
+### Reward
+Each time step incurs -1 reward, and stepping into the cliff incurs -100 reward.
+
+### Arguments
+
+```
+gymnasium.make('CliffWalking-v0')
+```
+
+### Version History
+- v0: Initial version release
diff --git a/docs/environments/toy_text/frozen_lake.md b/docs/environments/toy_text/frozen_lake.md
new file mode 100644
index 000000000..47acfa7fc
--- /dev/null
+++ b/docs/environments/toy_text/frozen_lake.md
@@ -0,0 +1,99 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Frozen Lake
+---
+
+# Frozen Lake
+
+```{figure} ../../_static/videos/toy_text/frozen_lake.gif
+:width: 200px
+:name: frozen_lake
+```
+
+This environment is part of the Toy Text environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(4) |
+| Observation Space | Discrete(16) |
+| Import | `gymnasium.make("FrozenLake-v1")` |
+
+
+Frozen lake involves crossing a frozen lake from Start(S) to Goal(G) without falling into any Holes(H)
+by walking over the Frozen(F) lake.
+The agent may not always move in the intended direction due to the slippery nature of the frozen lake.
+
+
+### Action Space
+The agent takes a 1-element vector for actions.
+The action space is `(dir)`, where `dir` decides direction to move in which can be:
+
+- 0: LEFT
+- 1: DOWN
+- 2: RIGHT
+- 3: UP
+
+### Observation Space
+The observation is a value representing the agent's current position as
+current_row * nrows + current_col (where both the row and col start at 0).
+For example, the goal position in the 4x4 map can be calculated as follows: 3 * 4 + 3 = 15.
+The number of possible observations is dependent on the size of the map.
+For example, the 4x4 map has 16 possible observations.
+
+### Rewards
+
+Reward schedule:
+- Reach goal(G): +1
+- Reach hole(H): 0
+- Reach frozen(F): 0
+
+### Arguments
+
+```
+gymnasium.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True)
+```
+
+`desc`: Used to specify custom map for frozen lake. For example,
+
+ desc=["SFFF", "FHFH", "FFFH", "HFFG"].
+
+ A random generated map can be specified by calling the function `generate_random_map`. For example,
+
+ ```
+ from gymnasium.envs.toy_text.frozen_lake import generate_random_map
+
+ gymnasium.make('FrozenLake-v1', desc=generate_random_map(size=8))
+ ```
+
+`map_name`: ID to use any of the preloaded maps.
+
+ "4x4":[
+ "SFFF",
+ "FHFH",
+ "FFFH",
+ "HFFG"
+ ]
+
+ "8x8": [
+ "SFFFFFFF",
+ "FFFFFFFF",
+ "FFFHFFFF",
+ "FFFFFHFF",
+ "FFFHFFFF",
+ "FHHFFFHF",
+ "FHFFHFHF",
+ "FFFHFFFG",
+ ]
+
+`is_slippery`: True/False. If True will move in intended direction with
+probability of 1/3 else will move in either perpendicular direction with
+equal probability of 1/3 in both directions.
+
+ For example, if action is left and is_slippery is True, then:
+ - P(move left)=1/3
+ - P(move up)=1/3
+ - P(move down)=1/3
+
+### Version History
+* v1: Bug fixes to rewards
+* v0: Initial versions release (1.0.0)
diff --git a/docs/environments/toy_text/index.html b/docs/environments/toy_text/index.html
new file mode 100644
index 000000000..84adffbae
--- /dev/null
+++ b/docs/environments/toy_text/index.html
@@ -0,0 +1,29 @@
+
+
+
+
\ No newline at end of file
diff --git a/docs/environments/toy_text/index.md b/docs/environments/toy_text/index.md
new file mode 100644
index 000000000..eb7cd344b
--- /dev/null
+++ b/docs/environments/toy_text/index.md
@@ -0,0 +1,25 @@
+---
+firstpage:
+lastpage:
+---
+
+## Toy Text
+
+```{toctree}
+:hidden:
+
+blackjack.md
+taxi.md
+cliff_walking.md
+frozen_lake.md
+```
+
+```{raw} html
+ :file: index.html
+```
+
+All toy text environments were created by us using native Python libraries such as StringIO.
+
+These environments are designed to be extremely simple, with small discrete state and action spaces, and hence easy to learn. As a result, they are suitable for debugging implementations of reinforcement learning algorithms.
+
+All environments are configurable via arguments specified in each environment's documentation.
diff --git a/docs/environments/toy_text/taxi.md b/docs/environments/toy_text/taxi.md
new file mode 100644
index 000000000..3365d2682
--- /dev/null
+++ b/docs/environments/toy_text/taxi.md
@@ -0,0 +1,116 @@
+---
+AUTOGENERATED: DO NOT EDIT FILE DIRECTLY
+title: Taxi
+lastpage:
+---
+
+# Taxi
+
+```{figure} ../../_static/videos/toy_text/taxi.gif
+:width: 200px
+:name: taxi
+```
+
+This environment is part of the Toy Text environments. Please read that page first for general information.
+
+| | |
+|---|---|
+| Action Space | Discrete(6) |
+| Observation Space | Discrete(500) |
+| Import | `gymnasium.make("Taxi-v3")` |
+
+
+The Taxi Problem
+from "Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition"
+by Tom Dietterich
+
+### Description
+There are four designated locations in the grid world indicated by R(ed),
+G(reen), Y(ellow), and B(lue). When the episode starts, the taxi starts off
+at a random square and the passenger is at a random location. The taxi
+drives to the passenger's location, picks up the passenger, drives to the
+passenger's destination (another one of the four specified locations), and
+then drops off the passenger. Once the passenger is dropped off, the episode ends.
+
+Map:
+
+ +---------+
+ |R: | : :G|
+ | : | : : |
+ | : : : : |
+ | | : | : |
+ |Y| : |B: |
+ +---------+
+
+### Actions
+There are 6 discrete deterministic actions:
+- 0: move south
+- 1: move north
+- 2: move east
+- 3: move west
+- 4: pickup passenger
+- 5: drop off passenger
+
+### Observations
+There are 500 discrete states since there are 25 taxi positions, 5 possible
+locations of the passenger (including the case when the passenger is in the
+taxi), and 4 destination locations.
+
+Note that there are 400 states that can actually be reached during an
+episode. The missing states correspond to situations in which the passenger
+is at the same location as their destination, as this typically signals the
+end of an episode. Four additional states can be observed right after a
+successful episodes, when both the passenger and the taxi are at the destination.
+This gives a total of 404 reachable discrete states.
+
+Each state space is represented by the tuple:
+(taxi_row, taxi_col, passenger_location, destination)
+
+An observation is an integer that encodes the corresponding state.
+The state tuple can then be decoded with the "decode" method.
+
+Passenger locations:
+- 0: R(ed)
+- 1: G(reen)
+- 2: Y(ellow)
+- 3: B(lue)
+- 4: in taxi
+
+Destinations:
+- 0: R(ed)
+- 1: G(reen)
+- 2: Y(ellow)
+- 3: B(lue)
+
+### Info
+
+``step`` and ``reset(return_info=True)`` will return an info dictionary that contains "p" and "action_mask" containing
+ the probability that the state is taken and a mask of what actions will result in a change of state to speed up training.
+
+As Taxi's initial state is a stochastic, the "p" key represents the probability of the
+transition however this value is currently bugged being 1.0, this will be fixed soon.
+As the steps are deterministic, "p" represents the probability of the transition which is always 1.0
+
+For some cases, taking an action will have no effect on the state of the agent.
+In v0.25.0, ``info["action_mask"]`` contains a np.ndarray for each of the action specifying
+if the action will change the state.
+
+To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])``
+Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``.
+
+### Rewards
+- -1 per step unless other reward is triggered.
+- +20 delivering passenger.
+- -10 executing "pickup" and "drop-off" actions illegally.
+
+### Arguments
+
+```
+gymnasium.make('Taxi-v3')
+```
+
+### Version History
+* v3: Map Correction + Cleaner Domain Description, v0.25.0 action masking added to the reset and step information
+* v2: Disallow Taxi start location = goal location, Update Taxi observations in the rollout, Update Taxi reward threshold.
+* v1: Remove (3,2) from locs, add passidx<4 check
+* v0: Initial versions release
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 000000000..f6d007db7
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,77 @@
+---
+hide-toc: true
+firstpage:
+lastpage:
+---
+
+## Gymnasium is a standard API for reinforcement learning, and a diverse collection of reference environments
+
+
+```{figure} _static/videos/box2d/lunar_lander_continuous.gif
+ :alt: Lunar Lander
+ :width: 500
+```
+
+**The Gymnasium interface is simple, pythonic, and capable of representing general RL problems:**
+
+```{code-block} python
+
+import gymnasium
+env = gymnasium.make("LunarLander-v2", render_mode="human")
+observation, info = env.reset(seed=42)
+for _ in range(1000):
+ action = policy(observation) # User-defined policy function
+ observation, reward, terminated, truncated, info = env.step(action)
+
+ if terminated or truncated:
+ observation, info = env.reset()
+env.close()
+```
+
+```{toctree}
+:hidden:
+:caption: Introduction
+
+content/basic_usage
+```
+
+```{toctree}
+:hidden:
+:caption: API
+
+api/core
+api/spaces
+api/spaces_utils
+api/wrappers
+api/vector
+api/utils
+```
+
+```{toctree}
+:hidden:
+:caption: Environments
+
+environments/atari/index
+environments/mujoco/index
+environments/toy_text/index
+environments/classic_control/index
+environments/box2d/index
+environments/third_party_environments/index
+```
+
+```{toctree}
+:hidden:
+:caption: Tutorials
+
+content/environment_creation
+content/vectorising
+```
+
+```{toctree}
+:hidden:
+:caption: Development
+
+Github
+Donate
+
+```
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 000000000..8084272b4
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.https://www.sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 000000000..96549120c
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,5 @@
+sphinx
+myst-parser
+furo
+moviepy
+pygame
\ No newline at end of file
diff --git a/docs/scripts/gen_atari_table.py b/docs/scripts/gen_atari_table.py
new file mode 100644
index 000000000..20b4e2822
--- /dev/null
+++ b/docs/scripts/gen_atari_table.py
@@ -0,0 +1,109 @@
+import tabulate
+from tqdm import tqdm
+
+import gymnasium
+
+
+def shortened_repr(lst):
+ assert all(isinstance(item, int) for item in lst)
+ assert len(set(lst)) == len(lst)
+ lst = sorted(lst)
+
+ if lst[-1] - lst[0] == len(lst) - 1 and len(lst) > 3:
+ return f"`[{lst[0]}, ..., {lst[-1]}]`"
+ elif len(lst) > 3 and lst[-2] - lst[0] == len(lst) - 2:
+ return f"`[{lst[0]}, ..., {lst[-2]}, {lst[-1]}]`"
+ return f"`{str(lst)}`"
+
+
+def to_gymnasium_spelling(game):
+ parts = game.split("_")
+ return "".join([part.capitalize() for part in parts])
+
+
+atari_envs = [
+ "adventure",
+ "air_raid",
+ "alien",
+ "amidar",
+ "assault",
+ "asterix",
+ "asteroids",
+ "atlantis",
+ "bank_heist",
+ "battle_zone",
+ "beam_rider",
+ "berzerk",
+ "bowling",
+ "boxing",
+ "breakout",
+ "carnival",
+ "centipede",
+ "chopper_command",
+ "crazy_climber",
+ "defender",
+ "demon_attack",
+ "double_dunk",
+ "elevator_action",
+ "enduro",
+ "fishing_derby",
+ "freeway",
+ "frostbite",
+ "gopher",
+ "gravitar",
+ "hero",
+ "ice_hockey",
+ "jamesbond",
+ "journey_escape",
+ "kangaroo",
+ "krull",
+ "kung_fu_master",
+ "montezuma_revenge",
+ "ms_pacman",
+ "name_this_game",
+ "phoenix",
+ "pitfall",
+ "pong",
+ "pooyan",
+ "private_eye",
+ "qbert",
+ "riverraid",
+ "road_runner",
+ "robotank",
+ "seaquest",
+ "skiing",
+ "solaris",
+ "space_invaders",
+ "star_gunner",
+ "tennis",
+ "time_pilot",
+ "tutankham",
+ "up_n_down",
+ "venture",
+ "video_pinball",
+ "wizard_of_wor",
+ "yars_revenge",
+ "zaxxon",
+]
+
+
+header = ["Environment", "Valid Modes", "Valid Difficulties", "Default Mode"]
+rows = []
+
+for game in tqdm(atari_envs):
+ env = gymnasium.make(f"ALE/{to_gymnasium_spelling(game)}-v5")
+ valid_modes = env.unwrapped.ale.getAvailableModes()
+ valid_difficulties = env.unwrapped.ale.getAvailableDifficulties()
+ difficulty = env.unwrapped.ale.cloneState().getDifficulty()
+ assert difficulty == 0, difficulty
+ rows.append(
+ [
+ to_gymnasium_spelling(game),
+ shortened_repr(valid_modes),
+ shortened_repr(valid_difficulties),
+ f"`{env.unwrapped.ale.cloneState().getCurrentMode()}`",
+ ]
+ )
+
+
+print(tabulate.tabulate(rows, headers=header, tablefmt="github"))
diff --git a/docs/scripts/gen_envs_display.py b/docs/scripts/gen_envs_display.py
new file mode 100644
index 000000000..0873ac85e
--- /dev/null
+++ b/docs/scripts/gen_envs_display.py
@@ -0,0 +1,183 @@
+import sys
+
+all_envs = [
+ {
+ "id": "mujoco",
+ "list": [
+ "ant",
+ "half_cheetah",
+ "hopper",
+ "humanoid_standup",
+ "humanoid",
+ "inverted_double_pendulum",
+ "inverted_pendulum",
+ "reacher",
+ "swimmer",
+ "walker2d",
+ ],
+ },
+ {"id": "toy_text", "list": ["blackjack", "frozen_lake"]},
+ {"id": "box2d", "list": ["bipedal_walker", "car_racing", "lunar_lander"]},
+ {
+ "id": "classic_control",
+ "list": [
+ "acrobot",
+ "cart_pole",
+ "mountain_car_continuous",
+ "mountain_car",
+ "pendulum",
+ ],
+ },
+ {
+ "id": "atari",
+ "list": [
+ "adventure",
+ "air_raid",
+ "alien",
+ "amidar",
+ "assault",
+ "asterix",
+ "asteroids",
+ "atlantis",
+ "bank_heist",
+ "battle_zone",
+ "beam_rider",
+ "berzerk",
+ "bowling",
+ "boxing",
+ "breakout",
+ "carnival",
+ "centipede",
+ "chopper_command",
+ "crazy_climber",
+ "defender",
+ "demon_attack",
+ "double_dunk",
+ "elevator_action",
+ "enduro",
+ "fishing_derby",
+ "freeway",
+ "frostbite",
+ "gopher",
+ "gravitar",
+ "hero",
+ "ice_hockey",
+ "jamesbond",
+ "journey_escape",
+ "kangaroo",
+ "krull",
+ "kung_fu_master",
+ "montezuma_revenge",
+ "ms_pacman",
+ "name_this_game",
+ "phoenix",
+ "pitfall",
+ "pong",
+ "pooyan",
+ "private_eye",
+ "qbert",
+ "riverraid",
+ "road_runner",
+ "robotank",
+ "seaquest",
+ "skiing",
+ "solaris",
+ "space_invaders",
+ "star_gunner",
+ "tennis",
+ "time_pilot",
+ "tutankham",
+ "up_n_down",
+ "venture",
+ "video_pinball",
+ "wizard_of_wor",
+ "yars_revenge",
+ "zaxxon",
+ ],
+ },
+]
+
+
+def create_grid_cell(type_id, env_id, base_path):
+ return f"""
+
+