thu-ml · MischaPanch · Nov 8, 2023 · Sep 20, 2023 · Jul 26, 2023 · Sep 19, 2023
diff --git a/.gitignore b/.gitignore
@@ -151,4 +151,8 @@ wandb/
 videos/
 
 # might be needed for IDE plugins that can't read ruff config
-.flake8
+.flake8
+
+# temporary scripts (for ad-hoc testing), temp folder
+/temp
+/temp*.py
diff --git a/docs/.gitignore b/docs/.gitignore
@@ -0,0 +1,2 @@
+# auto-generated content
+/api/tianshou.highlevel
diff --git a/docs/autogen_rst.py b/docs/autogen_rst.py
@@ -0,0 +1,121 @@
+import logging
+import os
+import shutil
+from pathlib import Path
+
+log = logging.getLogger(os.path.basename(__file__))
+
+
+def module_template(module_qualname: str):
+    module_name = module_qualname.split(".")[-1]
+    title = module_name.replace("_", r"\_")
+    return f"""{title}
+{"="*len(title)}
+
+.. automodule:: {module_qualname}
+   :members:
+   :undoc-members:
+"""
+
+
+def package_template(package_qualname: str):
+    package_name = package_qualname.split(".")[-1]
+    title = package_name.replace("_", r"\_")
+    return f"""{title}
+{"="*len(title)}
+
+.. automodule:: {package_qualname}
+   :members:
+   :undoc-members:
+
+.. toctree::
+   :glob:
+
+   {package_name}/*
+"""
+
+
+def indexTemplate(package_name):
+    title = package_name
+    return f"""{title}
+{"="*len(title)}
+
+.. automodule:: {package_name}
+   :members:
+   :undoc-members:
+
+.. toctree::
+   :glob:
+
+   *
+"""
+
+
+def write_to_file(content: str, path: str):
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w") as f:
+        f.write(content)
+    os.chmod(path, 0o666)
+
+
+def make_rst(src_root, rst_root, clean=False, overwrite=False, package_prefix=""):
+    """Creates/updates documentation in form of rst files for modules and packages.
+    Does not delete any existing rst files. Thus, rst files for packages or modules that have been removed or renamed
+    should be deleted by hand.
+
+    This method should be executed from the project's top-level directory
+
+    :param src_root: path to library base directory, typically "src/<library_name>"
+    :param clean: whether to completely clean the target directory beforehand, removing any existing .rst files
+    :param overwrite: whether to overwrite existing rst files. This should be used with caution as it will delete
+        all manual changes to documentation files
+    :package_prefix: a prefix to prepend to each module (for the case where the src_root is not the base package),
+        which, if not empty, should end with a "."
+    :return:
+    """
+    rst_root = os.path.abspath(rst_root)
+
+    if clean and os.path.isdir(rst_root):
+        shutil.rmtree(rst_root)
+
+    base_package_name = package_prefix + os.path.basename(src_root)
+    write_to_file(indexTemplate(base_package_name), os.path.join(rst_root, "index.rst"))
+
+    for root, dirnames, filenames in os.walk(src_root):
+        if os.path.basename(root).startswith("_"):
+            continue
+        base_package_relpath = os.path.relpath(root, start=src_root)
+        base_package_qualname = package_prefix + os.path.relpath(
+            root,
+            start=os.path.dirname(src_root),
+        ).replace(os.path.sep, ".")
+
+        for dirname in dirnames:
+            if not dirname.startswith("_"):
+                package_qualname = f"{base_package_qualname}.{dirname}"
+                package_rst_path = os.path.join(rst_root, base_package_relpath, f"{dirname}.rst")
+                log.info(f"Writing package documentation to {package_rst_path}")
+                write_to_file(package_template(package_qualname), package_rst_path)
+
+        for filename in filenames:
+            base_name, ext = os.path.splitext(filename)
+            if ext == ".py" and not filename.startswith("_"):
+                module_qualname = f"{base_package_qualname}.{filename[:-3]}"
+
+                module_rst_path = os.path.join(rst_root, base_package_relpath, f"{base_name}.rst")
+                if os.path.exists(module_rst_path) and not overwrite:
+                    log.debug(f"{module_rst_path} already exists, skipping it")
+
+                log.info(f"Writing module documentation to {module_rst_path}")
+                write_to_file(module_template(module_qualname), module_rst_path)
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    docs_root = Path(__file__).parent
+    make_rst(
+        docs_root / ".." / "tianshou" / "highlevel",
+        docs_root / "api" / "tianshou.highlevel",
+        clean=True,
+        package_prefix="tianshou.",
+    )
diff --git a/docs/index.rst b/docs/index.rst
@@ -118,6 +118,7 @@ Tianshou is still under development, you can also check out the documents in sta
    api/tianshou.trainer
    api/tianshou.exploration
    api/tianshou.utils
+   api/tianshou.highlevel/index
 
 
 .. toctree::

diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
@@ -180,3 +180,11 @@ params
 inplace
 deepcopy
 Gaussian
+stdout
+parallelization
+minibatch
+minibatches
+MLP
+backpropagation
+dataclass
+superset
diff --git a/examples/atari/atari_callbacks.py b/examples/atari/atari_callbacks.py
@@ -0,0 +1,33 @@
+from tianshou.highlevel.trainer import (
+    TrainerEpochCallbackTest,
+    TrainerEpochCallbackTrain,
+    TrainingContext,
+)
+from tianshou.policy import DQNPolicy
+
+
+class TestEpochCallbackDQNSetEps(TrainerEpochCallbackTest):
+    def __init__(self, eps_test: float):
+        self.eps_test = eps_test
+
+    def callback(self, epoch: int, env_step: int, context: TrainingContext) -> None:
+        policy: DQNPolicy = context.policy
+        policy.set_eps(self.eps_test)
+
+
+class TrainEpochCallbackNatureDQNEpsLinearDecay(TrainerEpochCallbackTrain):
+    def __init__(self, eps_train: float, eps_train_final: float):
+        self.eps_train = eps_train
+        self.eps_train_final = eps_train_final
+
+    def callback(self, epoch: int, env_step: int, context: TrainingContext) -> None:
+        policy: DQNPolicy = context.policy
+        logger = context.logger
+        # nature DQN setting, linear decay in the first 1M steps
+        if env_step <= 1e6:
+            eps = self.eps_train - env_step / 1e6 * (self.eps_train - self.eps_train_final)
+        else:
+            eps = self.eps_train_final
+        policy.set_eps(eps)
+        if env_step % 1000 == 0:
+            logger.write("train/env_step", env_step, {"train/eps": eps})
diff --git a/examples/atari/atari_dqn_hl.py b/examples/atari/atari_dqn_hl.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+import os
+
+from examples.atari.atari_callbacks import (
+    TestEpochCallbackDQNSetEps,
+    TrainEpochCallbackNatureDQNEpsLinearDecay,
+)
+from examples.atari.atari_network import (
+    IntermediateModuleFactoryAtariDQN,
+    IntermediateModuleFactoryAtariDQNFeatures,
+)
+from examples.atari.atari_wrapper import AtariEnvFactory, AtariStopCallback
+from tianshou.highlevel.config import SamplingConfig
+from tianshou.highlevel.experiment import (
+    DQNExperimentBuilder,
+    ExperimentConfig,
+)
+from tianshou.highlevel.params.policy_params import DQNParams
+from tianshou.highlevel.params.policy_wrapper import (
+    PolicyWrapperFactoryIntrinsicCuriosity,
+)
+from tianshou.utils import logging
+from tianshou.utils.logging import datetime_tag
+
+
+def main(
+    experiment_config: ExperimentConfig,
+    task: str = "PongNoFrameskip-v4",
+    scale_obs: int = 0,
+    eps_test: float = 0.005,
+    eps_train: float = 1.0,
+    eps_train_final: float = 0.05,
+    buffer_size: int = 100000,
+    lr: float = 0.0001,
+    gamma: float = 0.99,
+    n_step: int = 3,
+    target_update_freq: int = 500,
+    epoch: int = 100,
+    step_per_epoch: int = 100000,
+    step_per_collect: int = 10,
+    update_per_step: float = 0.1,
+    batch_size: int = 32,
+    training_num: int = 10,
+    test_num: int = 10,
+    frames_stack: int = 4,
+    save_buffer_name: str | None = None,  # TODO support?
+    icm_lr_scale: float = 0.0,
+    icm_reward_scale: float = 0.01,
+    icm_forward_loss_weight: float = 0.2,
+):
+    log_name = os.path.join(task, "dqn", str(experiment_config.seed), datetime_tag())
+
+    sampling_config = SamplingConfig(
+        num_epochs=epoch,
+        step_per_epoch=step_per_epoch,
+        batch_size=batch_size,
+        num_train_envs=training_num,
+        num_test_envs=test_num,
+        buffer_size=buffer_size,
+        step_per_collect=step_per_collect,
+        update_per_step=update_per_step,
+        repeat_per_collect=None,
+        replay_buffer_stack_num=frames_stack,
+        replay_buffer_ignore_obs_next=True,
+        replay_buffer_save_only_last_obs=True,
+    )
+
+    env_factory = AtariEnvFactory(task, experiment_config.seed, frames_stack, scale=scale_obs)
+
+    builder = (
+        DQNExperimentBuilder(env_factory, experiment_config, sampling_config)
+        .with_dqn_params(
+            DQNParams(
+                discount_factor=gamma,
+                estimation_step=n_step,
+                lr=lr,
+                target_update_freq=target_update_freq,
+            ),
+        )
+        .with_model_factory(IntermediateModuleFactoryAtariDQN())
+        .with_trainer_epoch_callback_train(
+            TrainEpochCallbackNatureDQNEpsLinearDecay(eps_train, eps_train_final),
+        )
+        .with_trainer_epoch_callback_test(TestEpochCallbackDQNSetEps(eps_test))
+        .with_trainer_stop_callback(AtariStopCallback(task))
+    )
+    if icm_lr_scale > 0:
+        builder.with_policy_wrapper_factory(
+            PolicyWrapperFactoryIntrinsicCuriosity(
+                feature_net_factory=IntermediateModuleFactoryAtariDQNFeatures(),
+                hidden_sizes=[512],
+                lr=lr,
+                lr_scale=icm_lr_scale,
+                reward_scale=icm_reward_scale,
+                forward_loss_weight=icm_forward_loss_weight,
+            ),
+        )
+
+    experiment = builder.build()
+    experiment.run(log_name)
+
+
+if __name__ == "__main__":
+    logging.run_cli(main)
diff --git a/examples/atari/atari_iqn_hl.py b/examples/atari/atari_iqn_hl.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+
+import os
+from collections.abc import Sequence
+
+from examples.atari.atari_callbacks import (
+    TestEpochCallbackDQNSetEps,
+    TrainEpochCallbackNatureDQNEpsLinearDecay,
+)
+from examples.atari.atari_network import (
+    IntermediateModuleFactoryAtariDQN,
+)
+from examples.atari.atari_wrapper import AtariEnvFactory, AtariStopCallback
+from tianshou.highlevel.config import SamplingConfig
+from tianshou.highlevel.experiment import (
+    ExperimentConfig,
+    IQNExperimentBuilder,
+)
+from tianshou.highlevel.params.policy_params import IQNParams
+from tianshou.utils import logging
+from tianshou.utils.logging import datetime_tag
+
+
+def main(
+    experiment_config: ExperimentConfig,
+    task: str = "PongNoFrameskip-v4",
+    scale_obs: int = 0,
+    eps_test: float = 0.005,
+    eps_train: float = 1.0,
+    eps_train_final: float = 0.05,
+    buffer_size: int = 100000,
+    lr: float = 0.0001,
+    gamma: float = 0.99,
+    sample_size: int = 32,
+    online_sample_size: int = 8,
+    target_sample_size: int = 8,
+    num_cosines: int = 64,
+    hidden_sizes: Sequence[int] = (512,),
+    n_step: int = 3,
+    target_update_freq: int = 500,
+    epoch: int = 100,
+    step_per_epoch: int = 100000,
+    step_per_collect: int = 10,
+    update_per_step: float = 0.1,
+    batch_size: int = 32,
+    training_num: int = 10,
+    test_num: int = 10,
+    frames_stack: int = 4,
+    save_buffer_name: str | None = None,  # TODO support?
+):
+    log_name = os.path.join(task, "iqn", str(experiment_config.seed), datetime_tag())
+
+    sampling_config = SamplingConfig(
+        num_epochs=epoch,
+        step_per_epoch=step_per_epoch,
+        batch_size=batch_size,
+        num_train_envs=training_num,
+        num_test_envs=test_num,
+        buffer_size=buffer_size,
+        step_per_collect=step_per_collect,
+        update_per_step=update_per_step,
+        repeat_per_collect=None,
+        replay_buffer_stack_num=frames_stack,
+        replay_buffer_ignore_obs_next=True,
+        replay_buffer_save_only_last_obs=True,
+    )
+
+    env_factory = AtariEnvFactory(task, experiment_config.seed, frames_stack, scale=scale_obs)
+
+    experiment = (
+        IQNExperimentBuilder(env_factory, experiment_config, sampling_config)
+        .with_iqn_params(
+            IQNParams(
+                discount_factor=gamma,
+                estimation_step=n_step,
+                lr=lr,
+                sample_size=sample_size,
+                online_sample_size=online_sample_size,
+                target_update_freq=target_update_freq,
+                target_sample_size=target_sample_size,
+                hidden_sizes=hidden_sizes,
+                num_cosines=num_cosines,
+            ),
+        )
+        .with_preprocess_network_factory(IntermediateModuleFactoryAtariDQN(features_only=True))
+        .with_trainer_epoch_callback_train(
+            TrainEpochCallbackNatureDQNEpsLinearDecay(eps_train, eps_train_final),
+        )
+        .with_trainer_epoch_callback_test(TestEpochCallbackDQNSetEps(eps_test))
+        .with_trainer_stop_callback(AtariStopCallback(task))
+        .build()
+    )
+    experiment.run(log_name)
+
+
+if __name__ == "__main__":
+    logging.run_cli(main)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# auto-generated content
		/api/tianshou.highlevel