[OpPerf] Fixed native output ordering, added warmup & runs command li…

…ne args (apache#17571) * Fixed ordering, added warmup & runs to argparse and individual benchmark function calls * Dropped unused ChainMap * Added newline for consistency with previous changes * Adjusted markdown output ordering
anirudh2290 · May 29, 2020 · 128d012 · 128d012
1 parent 453bb5c
commit 128d012
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 31 deletions.
diff --git a/benchmark/opperf/opperf.py b/benchmark/opperf/opperf.py
@@ -50,7 +50,7 @@
     get_current_runtime_features
 
 
-def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native'):
+def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
     """Run all the MXNet operators (NDArray) benchmarks.
 
     Returns
@@ -62,61 +62,61 @@ def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='n
     # *************************MXNET TENSOR OPERATOR BENCHMARKS*****************************
 
     # Run all Unary operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Binary Broadcast, element_wise, and miscellaneous operations benchmarks with default input values
     mxnet_operator_benchmark_results.append(run_mx_binary_broadcast_operators_benchmarks(ctx=ctx,
-                                                                                         dtype=dtype, profiler=profiler))
+                                                                                         dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
     mxnet_operator_benchmark_results.append(run_mx_binary_element_wise_operators_benchmarks(ctx=ctx,
-                                                                                            dtype=dtype, profiler=profiler))
+                                                                                            dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     mxnet_operator_benchmark_results.append(run_mx_binary_misc_operators_benchmarks(ctx=ctx,
-                                                                                         dtype=dtype, profiler=profiler))
+                                                                                         dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all GEMM operations benchmarks with default input values
     mxnet_operator_benchmark_results.append(run_gemm_operators_benchmarks(ctx=ctx,
-                                                                          dtype=dtype, profiler=profiler))
+                                                                          dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Random sampling operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Reduction operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Sorting and Searching operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_sorting_searching_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_sorting_searching_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Array Rearrange operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_rearrange_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_rearrange_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Indexing routines benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_indexing_routines_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_indexing_routines_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # ************************ MXNET NN OPERATOR BENCHMARKS ****************************
 
     # Run all basic NN operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Activation operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Pooling operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Convolution operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Optimizer operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_optimizer_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_optimizer_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Transpose Convolution operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_transpose_convolution_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_transpose_convolution_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all NN loss operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_loss_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_loss_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # Run all Linear Algebra operations benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_linalg_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+    mxnet_operator_benchmark_results.append(run_linalg_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
     # ****************************** PREPARE FINAL RESULTS ********************************
     final_benchmark_result_map = merge_map_list(mxnet_operator_benchmark_results)
@@ -159,6 +159,14 @@ def main():
                              'time module.'
                              'Valid Inputs - native, python')
 
+    parser.add_argument('-w', '--warmup', type=int, default=25,
+                        help='Number of times to run for warmup.'
+                             'Valid Inputs - positive integers')
+
+    parser.add_argument('-r', '--runs', type=int, default=100,
+                        help='Number of runs to capture benchmark results.'
+                             'Valid Inputs - positive integers')
+
     args = parser.parse_args()
     logging.info("Running MXNet operator benchmarks with the following options: {args}".format(args=args))
     assert not os.path.isfile(args.output_file),\
@@ -168,7 +176,14 @@ def main():
     ctx = _parse_mxnet_context(args.ctx)
     dtype = args.dtype
     profiler = args.profiler
-    final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler)
+    warmup = args.warmup
+    runs = args.runs
+    benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)
+
+    # Sort benchmark results alphabetically by op name
+    final_benchmark_results = dict()
+    for key in sorted(benchmark_results.keys()):
+        final_benchmark_results[key] = benchmark_results[key]
 
     # 3. PREPARE OUTPUTS
     run_time_features = get_current_runtime_features()

diff --git a/benchmark/opperf/utils/benchmark_utils.py b/benchmark/opperf/utils/benchmark_utils.py
@@ -80,14 +80,14 @@ def _run_nd_operator_performance_test(op, inputs, run_backward, warmup, runs, ar
             _, profiler_output = benchmark_helper_func(op, runs, [], **kwargs)
 
             # Add inputs used for profiling this operator into result
-            profiler_output["inputs"] = inputs[idx]
+            profiler_output = merge_map_list([{"inputs": inputs[idx]}] + [profiler_output])
             op_benchmark_result[op.__name__].append(profiler_output)
     else:
         for idx, (args, kwargs) in enumerate(zip(args_list, kwargs_list)):
             _, profiler_output = benchmark_helper_func(op, runs, args, **kwargs)
 
             # Add inputs used for profiling this operator into result
-            profiler_output["inputs"] = inputs[idx]
+            profiler_output = merge_map_list([{"inputs": inputs[idx]}] + [profiler_output])
             op_benchmark_result[op.__name__].append(profiler_output)
     logging.info("Complete Benchmark - {name}".format(name=op.__name__))
     return op_benchmark_result

diff --git a/benchmark/opperf/utils/common_utils.py b/benchmark/opperf/utils/common_utils.py
@@ -19,8 +19,6 @@
 import json
 from operator import itemgetter
 
-from collections import ChainMap
-
 import logging
 logging.basicConfig(level=logging.INFO)
 
@@ -41,7 +39,14 @@ def merge_map_list(map_list):
     map where all individual maps in the into map_list are merged
 
     """
-    return dict(ChainMap(*map_list))
+    # Preserve order of underlying maps and keys when converting to a single map
+    final_map = dict()
+
+    for current_map in map_list:
+        for key in current_map:
+            final_map[key] =  current_map[key]
+
+    return final_map
 
 
 def save_to_file(inp_dict, out_filepath, out_format='json', runtime_features=None, profiler='native'):
@@ -65,7 +70,7 @@ def save_to_file(inp_dict, out_filepath, out_format='json', runtime_features=Non
     if out_format == 'json':
         # Save as JSON
         with open(out_filepath, "w") as result_file:
-            json.dump(inp_dict, result_file, indent=4, sort_keys=True)
+            json.dump(inp_dict, result_file, indent=4, sort_keys=False)
     elif out_format == 'md':
         # Save as md
         with open(out_filepath, "w") as result_file:
@@ -122,7 +127,7 @@ def _prepare_op_benchmark_result(op, op_bench_result, profiler):
     result = ""
     if profiler == "native":
         result = "| {} | {} | {} | {} | {} |".format(operator_name,
-                 avg_forward_time, avg_backward_time, max_mem_usage, inputs)
+                 inputs, max_mem_usage, avg_forward_time, avg_backward_time)
     elif profiler == "python":
         result = "| {} | {} | {} | {} | {} | {} |".format(operator_name, avg_time, p50_time, p90_time, p99_time, inputs)
     return result
@@ -139,8 +144,8 @@ def _prepare_markdown(results, runtime_features=None, profiler='native'):
     results_markdown.append("# Benchmark Results")
     if profiler == 'native':
         results_markdown.append(
-            "| Operator | Avg Forward Time (ms) | Avg. Backward Time (ms) | Max Mem Usage (Storage) (Bytes)"
-            " | Inputs |")
+            "| Operator | Inputs | Max Mem Usage (Storage) (Bytes) | Avg Forward Time (ms)"
+            " | Avg. Backward Time (ms) |")
         results_markdown.append("| :---: | :---: | :---: | :---: | :---: |")
     elif profiler == 'python':
         results_markdown.append(

diff --git a/benchmark/opperf/utils/profiler_utils.py b/benchmark/opperf/utils/profiler_utils.py
@@ -58,14 +58,24 @@ def _get_operator_profile(operator_name, operator_profile_results):
     else:
         op_name = operator_name
 
+    # Variables to store forward/backward performance results
+    forward_res, backward_res = None, None
+
     for line in operator_profile_results:
         if op_name in line or op_name[:3] + " " in line:
             operation = line.split()[0]
             operation_avg_time = float(line.split()[-1])
             if "_backward" in operation:
-                operator_profile["avg_time_backward_" + operator_name] = operation_avg_time
+                backward_res = operation_avg_time
             else:
-                operator_profile["avg_time_forward_" + operator_name] = operation_avg_time
+                forward_res = operation_avg_time
+
+    # Add forward and backward performance results to the dict in the correct order
+    if forward_res:
+        operator_profile["avg_time_forward_" + operator_name] = forward_res
+
+    if backward_res:
+        operator_profile["avg_time_backward_" + operator_name] = backward_res
 
     return operator_profile