Allow to show track details (#850)

With this commit we introduce a new `info` subcommand to Rally that allows users to print details about a track on the command line. This can be useful to see which challenges exist and which tasks are executed in each of them.
elastic · Dec 20, 2019 · 7ae1787 · 7ae1787
1 parent 15948fb
commit 7ae1787
Show file tree

Hide file tree

Showing 7 changed files with 172 additions and 2 deletions.
diff --git a/docs/adding_tracks.rst b/docs/adding_tracks.rst
@@ -202,6 +202,35 @@ The new track appears when you run ``esrally list tracks --track-path=~/rally-tr
     ----------  ----------------------------- -----------  ---------------  -----------------
     tutorial    Tutorial benchmark for Rally      11658903  N/A              1.4 GB
 
+You can also show details about your track with ``esrally info --track-path=~/rally-tracks/tutorial``::
+
+    dm@io:~ $ esrally info --track-path=~/rally-tracks/tutorial
+
+        ____        ____
+       / __ \____ _/ / /_  __
+      / /_/ / __ `/ / / / / /
+     / _, _/ /_/ / / / /_/ /
+    /_/ |_|\__,_/_/_/\__, /
+                    /____/
+
+    Showing details for track [tutorial]:
+
+    * Description: Tutorial benchmark for Rally
+    * Documents: 11,658,903
+    * Compressed Size: N/A
+    * Uncompressed Size: 1.4 GB
+
+
+    Schedule:
+    ----------
+
+    1. delete-index
+    2. create-index
+    3. cluster-health
+    4. bulk (8 clients)
+    5. force-merge
+    6. query-match-all (8 clients)
+
 Congratulations, you have created your first track! You can test it with ``esrally --distribution-version=6.0.0 --track-path=~/rally-tracks/tutorial``.
 
 .. _add_track_test_mode:

diff --git a/docs/command_line_reference.rst b/docs/command_line_reference.rst
@@ -30,6 +30,48 @@ The ``list`` subcommand is used to list different configuration options:
 
 To list a specific configuration option, place it after the ``list`` subcommand. For example, ``esrally list pipelines`` will list all pipelines known to Rally.
 
+``info``
+~~~~~~~~
+
+The ``info`` subcommand prints details about a track. Example::
+
+    esrally info --track=noaa --challenge=append-no-conflicts
+
+This will print::
+
+    Showing details for track [noaa]:
+
+    * Description: Global daily weather measurements from NOAA
+    * Documents: 33,659,481
+    * Compressed Size: 947.3 MB
+    * Uncompressed Size: 9.0 GB
+
+    ================================================
+    Challenge [append-no-conflicts] (run by default)
+    ================================================
+
+    Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green and we want to ensure that we don't use the query cache. Document ids are unique so all index operations are append only. After that a couple of queries are run.
+
+    Schedule:
+    ----------
+
+    1. delete-index
+    2. create-index
+    3. check-cluster-health
+    4. index (8 clients)
+    5. refresh-after-index
+    6. force-merge
+    7. refresh-after-force-merge
+    8. range_field_big_range
+    9. range_field_small_range
+    10. range_field_conjunction_big_range_small_term_query
+    11. range_field_conjunction_small_range_small_term_query
+    12. range_field_conjunction_small_range_big_term_query
+    13. range_field_conjunction_big_range_big_term_query
+    14. range_field_disjunction_small_range_small_term_query
+    15. range_field_disjunction_big_range_small_term_query
+
+It is also possible to use task filters (e.g. ``--include-tasks``) or to refer to a track via its path (``--track-path``) or use a different track repository (``--track-repository``).
 
 ``compare``
 ~~~~~~~~~~~

diff --git a/esrally/rally.py b/esrally/rally.py
@@ -72,6 +72,41 @@ def positive_number(v):
         default=10,
     )
 
+    info_parser = subparsers.add_parser("info", help="Show info about a track")
+    info_track_source_group = info_parser.add_mutually_exclusive_group()
+    info_track_source_group.add_argument(
+        "--track-repository",
+        help="Define the repository from where Rally will load tracks (default: default).",
+        # argparse is smart enough to use this default only if the user did not use --track-path and also did not specify anything
+        default="default"
+    )
+    info_track_source_group.add_argument(
+        "--track-path",
+        help="Define the path to a track.")
+
+    info_parser.add_argument(
+        "--track",
+        help="Define the track to use. List possible tracks with `{} list tracks` (default: geonames).".format(PROGRAM_NAME)
+        # we set the default value later on because we need to determine whether the user has provided this value.
+        # default="geonames"
+    )
+    info_parser.add_argument(
+        "--track-params",
+        help="Define a comma-separated list of key:value pairs that are injected verbatim to the track as variables.",
+        default=""
+    )
+    info_parser.add_argument(
+        "--challenge",
+        help="Define the challenge to use. List possible challenges for tracks with `{} list tracks`.".format(PROGRAM_NAME)
+    )
+    info_task_filter_group = info_parser.add_mutually_exclusive_group()
+    info_task_filter_group.add_argument(
+        "--include-tasks",
+        help="Defines a comma-separated list of tasks to run. By default all tasks of a challenge are run.")
+    info_task_filter_group.add_argument(
+        "--exclude-tasks",
+        help="Defines a comma-separated list of tasks not to run. By default all tasks of a challenge are run.")
+
     generate_parser = subparsers.add_parser("generate", help="Generate artifacts")
     generate_parser.add_argument(
         "artifact",
@@ -488,7 +523,7 @@ def positive_number(v):
             default=False)
 
     for p in [parser, config_parser, list_parser, race_parser, compare_parser, download_parser, install_parser,
-              start_parser, stop_parser]:
+              start_parser, stop_parser, info_parser]:
         # This option is needed to support a separate configuration for the integration tests on the same machine
         p.add_argument(
             "--configuration-name",
@@ -657,6 +692,8 @@ def dispatch_sub_command(cfg, sub_command):
             race(cfg)
         elif sub_command == "generate":
             generate(cfg)
+        elif sub_command == "info":
+            track.track_info(cfg)
         else:
             raise exceptions.SystemSetupError("Unknown subcommand [%s]" % sub_command)
         return True

diff --git a/esrally/track/__init__.py b/esrally/track/__init__.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from .loader import list_tracks, load_track, load_track_plugins, track_repo, prepare_track, operation_parameters, set_absolute_data_path
+from .loader import list_tracks, track_info, load_track, load_track_plugins, track_repo, prepare_track, operation_parameters, set_absolute_data_path
 
 # expose the complete track API
 from .track import *
diff --git a/esrally/track/loader.py b/esrally/track/loader.py
@@ -81,6 +81,52 @@ def list_tracks(cfg):
     console.println(tabulate.tabulate(tabular_data=data, headers=headers))
 
 
+def track_info(cfg):
+    def format_task(t, indent="", num="", suffix=""):
+        msg = "{}{}{}".format(indent, num, str(t))
+        if t.clients > 1:
+            msg += " ({} clients)".format(t.clients)
+        msg += suffix
+        return msg
+
+    def challenge_info(c):
+        if not c.auto_generated:
+            msg = "Challenge [{}]".format(c.name)
+            if c.default:
+                msg += " (run by default)"
+            console.println(msg, underline="=", overline="=")
+            if c.description:
+                console.println("\n{}".format(c.description))
+
+        console.println("\nSchedule:", underline="-")
+        console.println("")
+        for num, task in enumerate(c.schedule, start=1):
+            if task.nested:
+                console.println(format_task(task, suffix=":", num="{}. ".format(num)))
+                for leaf_num, leaf_task in enumerate(task, start=1):
+                    console.println(format_task(leaf_task, indent="\t", num="{}.{} ".format(num, leaf_num)))
+            else:
+                console.println(format_task(task, num="{}. ".format(num)))
+
+    t = load_track(cfg)
+    console.println("Showing details for track [{}]:\n".format(t.name))
+    console.println("* Description: {}".format(t.description))
+    if t.number_of_documents:
+        console.println("* Documents: {}".format(convert.number_to_human_string(t.number_of_documents)))
+        console.println("* Compressed Size: {}".format(convert.bytes_to_human_string(t.compressed_size_in_bytes)))
+        console.println("* Uncompressed Size: {}".format(convert.bytes_to_human_string(t.uncompressed_size_in_bytes)))
+    console.println("")
+
+    challenge_name = cfg.opts("track", "challenge.name", mandatory=False)
+    if challenge_name:
+        challenge = t.find_challenge(challenge_name)
+        challenge_info(challenge)
+    else:
+        for challenge in t.challenges:
+            challenge_info(challenge)
+            console.println("")
+
+
 def load_track(cfg):
     """
 

diff --git a/esrally/track/track.py b/esrally/track/track.py
@@ -576,6 +576,7 @@ class Parallel:
     def __init__(self, tasks, clients=None):
         self.tasks = tasks
         self._clients = clients
+        self.nested = True
 
     @property
     def clients(self):
@@ -631,6 +632,7 @@ def __init__(self, name, operation, meta_data=None, warmup_iterations=None, iter
         self.completes_parent = completes_parent
         self.schedule = schedule
         self.params = params if params else {}
+        self.nested = False
 
     def matches(self, task_filter):
         return task_filter.matches(self)

diff --git a/integration-test.sh b/integration-test.sh
@@ -253,6 +253,18 @@ function test_list {
     esrally list telemetry --configuration-name="${cfg}"
 }
 
+function test_info {
+    local cfg
+    random_configuration cfg
+
+    info "test info [${cfg}]"
+    esrally info --configuration-name="${cfg}" --track=geonames --challenge=append-no-conflicts
+    info "test info can also use a track repository [${cfg}]"
+    esrally info --configuration-name="${cfg}" --track-repository=default --track=geonames
+    info "test info with task filter [${cfg}]"
+    esrally info --configuration-name="${cfg}" --track=geonames --challenge=append-no-conflicts --include-tasks="type:search"
+}
+
 function test_download {
     local cfg
     random_configuration cfg
@@ -564,6 +576,8 @@ function run_test {
     test_configure
     echo "**************************************** TESTING RALLY LIST COMMANDS *******************************************"
     test_list
+    echo "**************************************** TESTING RALLY INFO COMMAND ********************************************"
+    test_info
     echo "**************************************** TESTING RALLY FAILS WITH UNUSED TRACK-PARAMS **************************"
     test_distribution_fails_with_wrong_track_params
     echo "**************************************** TESTING RALLY DOWNLOAD COMMAND ***********************************"