microsoft · you-n-g · May 17, 2021 · Feb 16, 2021 · Feb 17, 2021 · Feb 26, 2021
diff --git a/docs/advanced/task_managment.rst b/docs/advanced/task_managment.rst
@@ -0,0 +1,67 @@
+.. _task_managment:
+
+=================================
+Task Management
+=================================
+.. currentmodule:: qlib
+
+
+Introduction
+=============
+
+The `Workflow <../component/introduction.html>`_ part introduce how to run research workflow in a loosely-coupled way. But it can only execute one ``task`` when you use ``qrun``. To automatically generate and execute different tasks, Task Management module provide a whole process including `Task Generating`_, `Task Storing`_, `Task Running`_ and `Task Collecting`_. 
+With this module, users can run their ``task`` automatically at different periods, in different losses or even by different models.
+
+An example of the entire process is shown `here <>`_.
+
+Task Generating
+===============
+A ``task`` consists of `Model`, `Dataset`, `Record` or anything added by users. 
+The specific task template can be viewed in 
+`Task Section <../component/workflow.html#task-section>`_.
+Even though the task template is fixed, Users can use ``TaskGen`` to generate different ``task`` by task template.
+
+Here is the base class of TaskGen:
+
+.. autoclass:: qlib.workflow.task.gen.TaskGen
+    :members:
+
+``Qlib`` provider a class `RollingGen<https://github.com/microsoft/qlib/tree/main/qlib/workflow/task/gen.py>`_ to generate a list of ``task`` of dataset in different date segments.
+This allows users to verify the effect of data from different periods on the model in one experiment.
+
+Task Storing
+===============
+In order to achieve higher efficiency and the possibility of cluster operation, ``Task Manager`` will store all tasks in `MongoDB <https://www.mongodb.com/>`_.
+Users **MUST** finished the configuration of `MongoDB <https://www.mongodb.com/>`_ when using this module.
+
+Users need to provide the url and database of ``task`` storing like this.
+
+    .. code-block:: python
+
+        from qlib.config import C
+        C["mongo"] = {
+            "task_url" : "mongodb://localhost:27017/", # maybe you need to change it to your url
+            "task_db_name" : "rolling_db" # you can custom database name
+        }
+
+The CRUD methods of ``task`` can be found in TaskManager. More methods can be seen in the `Github<https://github.com/microsoft/qlib/tree/main/qlib/workflow/task/manage.py>`_.
+
+.. autoclass:: qlib.workflow.task.manage.TaskManager
+    :members:
+
+Task Running
+===============
+After generating and storing those ``task``, it's time to run the ``task`` in the *WAITING* status.
+``qlib`` provide a method to run those ``task`` in task pool, however users can also customize how tasks are executed.
+An easy way to get the ``task_func`` is using ``qlib.model.trainer.task_train`` directly.
+It will run the whole workflow defined by ``task``, which includes *Model*, *Dataset*, *Record*.
+
+.. autofunction:: qlib.workflow.task.manage.run_task
+
+Task Collecting
+===============
+To see the results of ``task`` after running, ``Qlib`` provide a task collector to collect the tasks by filter condition (optional).
+The collector will return a dict of filtered key (users defined by task config) and value (predict scores from ``pred.pkl``).
+
+.. autoclass:: qlib.workflow.task.collect.TaskCollector
+    :members:
diff --git a/examples/taskmanager/task_manager_rolling.ipynb b/examples/taskmanager/task_manager_rolling.ipynb
diff --git a/examples/taskmanager/task_manager_rolling.py b/examples/taskmanager/task_manager_rolling.py
@@ -0,0 +1,108 @@
+import qlib
+from qlib.config import REG_CN
+from qlib.workflow.task.gen import RollingGen, task_generator
+from qlib.workflow.task.manage import TaskManager
+from qlib.config import C
+
+data_handler_config = {
+    "start_time": "2008-01-01",
+    "end_time": "2020-08-01",
+    "fit_start_time": "2008-01-01",
+    "fit_end_time": "2014-12-31",
+    "instruments": 'csi100',
+}
+
+dataset_config = {
+        "class": "DatasetH",
+        "module_path": "qlib.data.dataset",
+        "kwargs": {
+            "handler": {
+                "class": "Alpha158",
+                "module_path": "qlib.contrib.data.handler",
+                "kwargs": data_handler_config,
+            },
+            "segments": {
+                "train": ("2008-01-01", "2014-12-31"),
+                "valid": ("2015-01-01", "2016-12-31"),
+                "test": ("2017-01-01", "2020-08-01"),
+            },
+        },
+    }
+
+record_config = [
+    {
+        "class": "SignalRecord",
+        "module_path": "qlib.workflow.record_temp",
+    },
+    {
+        "class": "SigAnaRecord",
+        "module_path": "qlib.workflow.record_temp",
+    }
+]
+
+# use lgb
+task_lgb_config = {
+    "model": {
+        "class": "LGBModel",
+        "module_path": "qlib.contrib.model.gbdt",
+    },
+    "dataset": dataset_config,
+    "record": record_config,
+}
+
+# use xgboost
+task_xgboost_config = {
+    "model": {
+        "class": "XGBModel",
+        "module_path": "qlib.contrib.model.xgboost",
+    },
+    "dataset": dataset_config,
+    "record": record_config,
+}
+
+provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
+qlib.init(provider_uri=provider_uri, region=REG_CN)
+
+C["mongo"] = {
+    "task_url" : "mongodb://localhost:27017/", # maybe you need to change it to your url
+    "task_db_name" : "rolling_db"
+}
+
+exp_name = 'rolling_exp' # experiment name, will be used as the experiment in MLflow
+task_pool = 'rolling_task' # task pool name, will be used as the document in MongoDB
+
+tasks = task_generator(
+    task_xgboost_config, # default task name
+    RollingGen(step=550,rtype=RollingGen.ROLL_SD), # generate different date segment
+    task_lgb=task_lgb_config # use "task_lgb" as the task name
+)
+
+# Uncomment next two lines to see the generated tasks
+# from pprint import pprint
+# pprint(tasks)
+
+tm = TaskManager(task_pool=task_pool)
+tm.create_task(tasks) # all tasks will be saved to MongoDB
+
+from qlib.workflow.task.manage import run_task
+from qlib.workflow.task.collect import RollingCollector
+from qlib.model.trainer import task_train
+
+run_task(task_train, task_pool, experiment_name=exp_name) # all tasks will be trained using "task_train" method
+
+def get_task_key(task_config):
+    task_key = task_config["task_key"]
+    rolling_end_timestamp = task_config["dataset"]["kwargs"]["segments"]["test"][1]
+    #rolling_end_datatime = rolling_end_timestamp.to_pydatetime()
+    return task_key, rolling_end_timestamp.strftime('%Y-%m-%d')
+
+def my_filter(task_config):
+    # only choose the results of "task_lgb" and test in 2019 from all tasks
+    task_key, rolling_end = get_task_key(task_config)
+    if task_key=="task_lgb" and rolling_end.startswith('2019'):
+        return True
+    return False
+
+collector = RollingCollector(get_task_key, my_filter)
+pred_rolling = collector(exp_name) # name tasks by "get_task_key" and filter tasks by "my_filter"
+print(pred_rolling)
diff --git a/qlib/__init__.py b/qlib/__init__.py
@@ -3,19 +3,21 @@
 
 
 __version__ = "0.6.3.99"
+__version__bak = __version__  # This version is backup for QlibConfig.reset_qlib_version
 
 
 import os
 import yaml
 import logging
 import platform
 import subprocess
+from pathlib import Path
+from .log import get_module_logger
 
 
 # init qlib
 def init(default_conf="client", **kwargs):
     from .config import C
-    from .log import get_module_logger
     from .data.cache import H
 
     H.clear()
@@ -48,7 +50,6 @@ def init(default_conf="client", **kwargs):
 
 
 def _mount_nfs_uri(C):
-    from .log import get_module_logger
 
     LOG = get_module_logger("mount nfs", level=logging.INFO)
 
@@ -151,3 +152,73 @@ def init_from_yaml_conf(conf_path, **kwargs):
     config.update(kwargs)
     default_conf = config.pop("default_conf", "client")
     init(default_conf, **config)
+
+
+def get_project_path(config_name="config.yaml") -> Path:
+    """
+    If users are building a project follow the following pattern.
+    - Qlib is a sub folder in project path
+    - There is a file named `config.yaml` in qlib.
+
+    For example:
+        If your project file system stucuture follows such a pattern
+
+            <project_path>/
+              - config.yaml
+              - ...some folders...
+                - qlib/
+
+        This folder will return <project_path>
+
+        NOTE: link is not supported here.
+
+
+    This method is often used when
+    - user want to use a relative config path instead of hard-coding qlib config path in code
+
+    Raises
+    ------
+    FileNotFoundError:
+        If project path is not found
+    """
+    cur_path = Path(__file__).absolute().resolve()
+    while True:
+        if (cur_path / config_name).exists():
+            return cur_path
+        if cur_path == cur_path.parent:
+            raise FileNotFoundError("We can't find the project path")
+        cur_path = cur_path.parent
+
+
+def auto_init(**kwargs):
+    """
+    This function will init qlib automatically with following priority
+    - Find the project configuration and init qlib
+        - The parsing process will be affected by the `conf_type` of the configuration file
+    - Init qlib with default config
+    """
+
+    try:
+        pp = get_project_path()
+    except FileNotFoundError:
+        init(**kwargs)
+    else:
+
+        conf_pp = pp / "config.yaml"
+        with conf_pp.open() as f:
+            conf = yaml.safe_load(f)
+
+        conf_type = conf.get("conf_type", "origin")
+        if conf_type == "origin":
+            # The type of config is just like original qlib config
+            init_from_yaml_conf(conf_pp, **kwargs)
+        elif conf_type == "ref":
+            # This config type will be more convenient in following scenario
+            # - There is a shared configure file and you don't want to edit it inplace.
+            # - The shared configure may be updated later and you don't want to copy it.
+            # - You have some customized config.
+            qlib_conf_path = conf["qlib_cfg"]
+            qlib_conf_update = conf.get("qlib_cfg_update")
+            init_from_yaml_conf(qlib_conf_path, **qlib_conf_update, **kwargs)
+        logger = get_module_logger("Initialization")
+        logger.info(f"Auto load project config: {conf_pp}")
diff --git a/qlib/config.py b/qlib/config.py
@@ -33,6 +33,9 @@ def __getattr__(self, attr):
 
         raise AttributeError(f"No such {attr} in self._config")
 
+    def get(self, key, default=None):
+        return self.__dict__["_config"].get(key, default)
+
     def __setitem__(self, key, value):
         self.__dict__["_config"][key] = value
 
@@ -310,8 +313,22 @@ def register(self):
         # clean up experiment when python program ends
         experiment_exit_handler()
 
+        # Supporting user reset qlib version (useful when user want to connect to qlib server with old version)
+        self.reset_qlib_version()
+
         self._registered = True
 
+    def reset_qlib_version(self):
+        import qlib
+
+        reset_version = self.get("qlib_reset_version", None)
+        if reset_version is not None:
+            qlib.__version__ = reset_version
+        else:
+            qlib.__version__ = getattr(qlib, "__version__bak")
+            # Due to a bug? that converting __version__ to _QlibConfig__version__bak
+            # Using  __version__bak instead of __version__
+
     @property
     def registered(self):
         return self._registered

diff --git a/qlib/model/ens/__init__.py b/qlib/model/ens/__init__.py
diff --git a/qlib/model/trainer.py b/qlib/model/trainer.py
@@ -6,14 +6,16 @@
 from qlib.workflow.record_temp import SignalRecord
 
 
-def task_train(task_config: dict, experiment_name):
+def task_train(task_config: dict, experiment_name: str):
     """
     task based training
 
     Parameters
     ----------
     task_config : dict
         A dict describes a task setting.
+    experiment_name: str
+        The name of experiment
     """
 
     # model initiaiton
@@ -27,16 +29,22 @@ def task_train(task_config: dict, experiment_name):
         model.fit(dataset)
         recorder = R.get_recorder()
         R.save_objects(**{"params.pkl": model})
+        R.save_objects(param=task_config)  # keep the original format and datatype
 
         # generate records: prediction, backtest, and analysis
-        for record in task_config["record"]:
+        records = task_config.get("record", [])
+        if isinstance(records, dict):  # prevent only one dict
+            records = [records]
+        for record in records:
             if record["class"] == SignalRecord.__name__:
                 srconf = {"model": model, "dataset": dataset, "recorder": recorder}
+                record.setdefault("kwargs", {})
                 record["kwargs"].update(srconf)
                 sr = init_instance_by_config(record)
                 sr.generate()
             else:
                 rconf = {"recorder": recorder}
+                record.setdefault("kwargs", {})
                 record["kwargs"].update(rconf)
                 ar = init_instance_by_config(record)
                 ar.generate()
diff --git a/qlib/workflow/task/__init__.py b/qlib/workflow/task/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""
+Task related workflow is implemented in this folder
+
+A typical task workflow
+
+| Step                  | Description                                    |
+|-----------------------+------------------------------------------------|
+| TaskGen               | Generating tasks.                              |
+| TaskManager(optional) | Manage generated tasks                         |
+| run task              | retrive  tasks from TaskManager and run tasks. |
+"""