From 96bac822614f5eac396cf6ca523191914b0560d6 Mon Sep 17 00:00:00 2001
From: pytorchbot <soumith+bot@pytorch.org>
Date: Thu, 10 Oct 2024 17:19:32 -0700
Subject: [PATCH] Improve ArmTester logging (#6080)

* Improve ArmTester logging (#5629)

Summary:
See commits for description

Pull Request resolved: https://github.com/pytorch/executorch/pull/5629

Reviewed By: digantdesai

Differential Revision: D63637272

Pulled By: mergennachin

fbshipit-source-id: c0025dcbe41b0e8c7c20252af4f31531d230712e
(cherry picked from commit 0d96f75194a39e3cbbe13f1983a5cc9a63130db8)

* Manually fix lint

---------

Co-authored-by: Erik Lundell <erik.lundell@arm.com>
Co-authored-by: Jack Zhang <dvorjackz@gmail.com>
---
 backends/arm/test/common.py                |   3 +
 backends/arm/test/misc/test_debug_feats.py | 120 +++++++++++--
 backends/arm/test/tester/arm_tester.py     | 191 +++++++++++++++++----
 docs/source/debug-backend-delegate.md      |   2 +-
 4 files changed, 272 insertions(+), 44 deletions(-)

diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py
index 0d50f1882d..614960b71b 100644
--- a/backends/arm/test/common.py
+++ b/backends/arm/test/common.py
@@ -4,9 +4,11 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
 import os
 import shutil
 import subprocess
+import sys
 import tempfile
 
 import pytest
@@ -37,6 +39,7 @@ def pytest_configure(config):
                 "Tests are run with --arm_run_corstone300 but corstone300 FVP is not installed."
             )
         _enabled_options.append("corstone300")
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout)
 
 
 def pytest_collection_modifyitems(config, items):
diff --git a/backends/arm/test/misc/test_debug_feats.py b/backends/arm/test/misc/test_debug_feats.py
index aa9703f9eb..c4f47daa08 100644
--- a/backends/arm/test/misc/test_debug_feats.py
+++ b/backends/arm/test/misc/test_debug_feats.py
@@ -6,6 +6,7 @@
 
 import logging
 import os
+import shutil
 import tempfile
 import unittest
 
@@ -126,8 +127,62 @@ def test_numerical_diff_prints(self):
             self.fail()
 
 
-class TestDumpOperatorsAndDtypes(unittest.TestCase):
-    def test_dump_ops_and_dtypes(self):
+def test_dump_ops_and_dtypes():
+    model = Linear(20, 30)
+    (
+        ArmTester(
+            model,
+            example_inputs=model.get_inputs(),
+            compile_spec=common.get_tosa_compile_spec(),
+        )
+        .quantize()
+        .dump_dtype_distribution()
+        .dump_operator_distribution()
+        .export()
+        .dump_dtype_distribution()
+        .dump_operator_distribution()
+        .to_edge()
+        .dump_dtype_distribution()
+        .dump_operator_distribution()
+        .partition()
+        .dump_dtype_distribution()
+        .dump_operator_distribution()
+    )
+    # Just test that there are no execptions.
+
+
+def test_dump_ops_and_dtypes_parseable():
+    model = Linear(20, 30)
+    (
+        ArmTester(
+            model,
+            example_inputs=model.get_inputs(),
+            compile_spec=common.get_tosa_compile_spec(),
+        )
+        .quantize()
+        .dump_dtype_distribution(print_table=False)
+        .dump_operator_distribution(print_table=False)
+        .export()
+        .dump_dtype_distribution(print_table=False)
+        .dump_operator_distribution(print_table=False)
+        .to_edge()
+        .dump_dtype_distribution(print_table=False)
+        .dump_operator_distribution(print_table=False)
+        .partition()
+        .dump_dtype_distribution(print_table=False)
+        .dump_operator_distribution(print_table=False)
+    )
+    # Just test that there are no execptions.
+
+
+class TestCollateTosaTests(unittest.TestCase):
+    """Tests the collation of TOSA tests through setting the environment variable TOSA_TESTCASE_BASE_PATH."""
+
+    def test_collate_tosa_BI_tests(self):
+        # Set the environment variable to trigger the collation of TOSA tests
+        os.environ["TOSA_TESTCASES_BASE_PATH"] = "test_collate_tosa_tests"
+        # Clear out the directory
+
         model = Linear(20, 30)
         (
             ArmTester(
@@ -136,16 +191,59 @@ def test_dump_ops_and_dtypes(self):
                 compile_spec=common.get_tosa_compile_spec(),
             )
             .quantize()
-            .dump_dtype_distribution()
-            .dump_operator_distribution()
             .export()
-            .dump_dtype_distribution()
-            .dump_operator_distribution()
             .to_edge()
-            .dump_dtype_distribution()
-            .dump_operator_distribution()
             .partition()
-            .dump_dtype_distribution()
-            .dump_operator_distribution()
+            .to_executorch()
+        )
+        # test that the output directory is created and contains the expected files
+        assert os.path.exists(
+            "test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests"
+        )
+        assert os.path.exists(
+            "test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/output_tag8.tosa"
+        )
+        assert os.path.exists(
+            "test_collate_tosa_tests/tosa-bi/TestCollateTosaTests/test_collate_tosa_BI_tests/desc_tag8.json"
+        )
+
+        os.environ.pop("TOSA_TESTCASES_BASE_PATH")
+        shutil.rmtree("test_collate_tosa_tests", ignore_errors=True)
+
+
+def test_dump_tosa_ops(caplog):
+    caplog.set_level(logging.INFO)
+    model = Linear(20, 30)
+    (
+        ArmTester(
+            model,
+            example_inputs=model.get_inputs(),
+            compile_spec=common.get_tosa_compile_spec(),
         )
-        # Just test that there are no execeptions.
+        .quantize()
+        .export()
+        .to_edge()
+        .partition()
+        .dump_operator_distribution()
+    )
+    assert "TOSA operators:" in caplog.text
+
+
+def test_fail_dump_tosa_ops(caplog):
+    caplog.set_level(logging.INFO)
+
+    class Add(torch.nn.Module):
+        def forward(self, x):
+            return x + x
+
+    model = Add()
+    compile_spec = common.get_u55_compile_spec()
+    (
+        ArmTester(model, example_inputs=(torch.ones(5),), compile_spec=compile_spec)
+        .quantize()
+        .export()
+        .to_edge()
+        .partition()
+        .dump_operator_distribution()
+    )
+    assert "Can not get operator distribution for Vela command stream." in caplog.text
diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py
index 2fe8c07e7d..fa9dbf3e60 100644
--- a/backends/arm/test/tester/arm_tester.py
+++ b/backends/arm/test/tester/arm_tester.py
@@ -7,13 +7,14 @@
 
 from collections import Counter
 from pprint import pformat
-from typing import Any, List, Literal, Optional, Tuple, Union
+from typing import Any, Iterable, List, Literal, Optional, Tuple, Union
 
 import executorch.backends.xnnpack.test.tester.tester as tester
 
 import numpy as np
+import serializer.tosa_serializer as ts
 
-import torch
+import torch.fx
 
 from executorch.backends.arm.arm_backend import get_intermediate_path, is_permute_memory
 from executorch.backends.arm.arm_partitioner import ArmPartitioner
@@ -30,28 +31,36 @@
     dbg_tosa_fb_to_json,
     RunnerUtil,
 )
+from executorch.backends.arm.tosa_mapping import extract_tensor_meta
 
 from executorch.backends.xnnpack.test.tester import Tester
+from executorch.devtools.backend_debug import get_delegation_info
 from executorch.exir import EdgeCompileConfig
 from executorch.exir.backend.compile_spec_schema import CompileSpec
+
 from executorch.exir.lowered_backend_module import LoweredBackendModule
+from tabulate import tabulate
+from torch.export.graph_signature import ExportGraphSignature, InputSpec, OutputSpec
 from torch.fx import Graph
 
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
 
 
 class Partition(tester.Partition):
     def dump_artifact(self, path_to_dump: Optional[str]):
         super().dump_artifact(path_to_dump)
 
+        output = "Formated Graph Signature:\n"
+        output += _format_export_graph_signature(
+            self.artifact.exported_program().graph_signature
+        )
+
         def get_output_format(lowered_module) -> str | None:
             for spec in lowered_module.compile_specs:
                 if spec.key == "output_format":
                     return spec.value.decode()
             return None
 
-        output = ""
         for node in self.graph_module.graph.nodes:
             if node.op == "get_attr" and node.name.startswith("lowered_module_"):
                 lowered_module = getattr(self.graph_module, node.name)
@@ -258,7 +267,9 @@ def run_method_and_compare_outputs(
             reference_stage = self.stages[self.stage_name(InitialModel)]
             quantization_scale = None
 
-        print(f"Comparing Stage {test_stage} with Stage {reference_stage}")
+        logger.info(
+            f"Comparing Stage '{self.stage_name(test_stage)}' with Stage '{self.stage_name(reference_stage)}'"
+        )
         is_nhwc = is_permute_memory(self.compile_spec)
 
         # Loop inputs and compare reference stage with the compared stage.
@@ -284,7 +295,8 @@ def run_method_and_compare_outputs(
                 generated_input.shape if hasattr(generated_input, "shape") else (1,)
                 for generated_input in reference_input
             ]
-            print(f"Run {run_iteration} with input shapes: {input_shapes}")
+            input_shape_str = ", ".join([str(list(i)) for i in input_shapes])
+            logger.info(f"Run #{run_iteration}, input shapes: {input_shape_str}")
 
             reference_output = reference_stage.run_artifact(reference_input)
             test_output = tuple(test_stage.run_artifact(test_input))
@@ -320,30 +332,84 @@ def get_graph(self, stage: str | None = None) -> Graph:
         return graph
 
     def dump_operator_distribution(
-        self, path_to_dump: Optional[str] = None
-    ) -> ArmQuantizer:
-        """Dump a dictionary with {operator: operator count} for the operators in the
-        graph of the current stage.
+        self, path_to_dump: Optional[str] = None, print_table: bool = True
+    ):
+        """Dump the distribution of operators in the current stage.
+        In the partition stage, additional information is included such as the number of
+        delegates and the distribution of TOSA operators.
+        Set parameter print_table to False to dump in a parseable format.
+
 
         Returns self for daisy-chaining.
         """
-        graph = self.get_graph(self.cur)
-        op_dist = _get_operator_distribution(graph)
-        to_print = self.cur + " operators: " + _format_dict(op_dist) + "\n"
+        line = "#" * 10
+        to_print = f"{line} {self.cur.capitalize()} Operator Distribution {line}\n"
+
+        if self.cur == self.stage_name(tester.Partition) and print_table:
+            graph_module = self.get_artifact().exported_program().graph_module
+            if print_table:
+                delegation_info = get_delegation_info(graph_module)
+                op_dist = delegation_info.get_operator_delegation_dataframe()
+            else:
+                op_dist = dict(_get_operator_distribution(graph_module.graph))
+            to_print += _format_dict(op_dist, print_table)
+            to_print += "\n" + _get_tosa_operator_distribution(
+                graph_module, print_table
+            )
+            to_print += "\n"
+            to_print += delegation_info.get_summary()
+        else:
+            graph = self.get_graph(self.cur)
+            op_dist = dict(_get_operator_distribution(graph))
+            if print_table:
+                op_dist = {
+                    "Operator": list(op_dist),
+                    "Count": [op_dist[key] for key in op_dist],
+                }
+            to_print += _format_dict(op_dist, print_table) + "\n"
+
         _dump_str(to_print, path_to_dump)
+
         return self
 
     def dump_dtype_distribution(
-        self, path_to_dump: Optional[str] = None
-    ) -> ArmQuantizer:
-        """Dump a dictionary with {dtype: dtype count} for the dtypes of the nodes in the
-        graph of the current stage.
+        self, path_to_dump: Optional[str] = None, print_table: bool = True
+    ):
+        """Dump a the distributions of dtypes of nodes and placeholders in the current stage.
+        Set parameter print_table to False to dump in a parseable format.
 
         Returns self for daisy-chaining.
         """
+
+        line = "#" * 10
+        to_print = (
+            f"{line} {self.cur.capitalize()} Placeholder Dtype Distribution {line}\n"
+        )
+
         graph = self.get_graph(self.cur)
-        op_dist = _get_dtype_distribution(graph)
-        to_print = self.cur + " placeholder data types: " + _format_dict(op_dist) + "\n"
+        dtype_dist_placeholders, dtype_dirst_tensors = _get_dtype_distribution(graph)
+        all_dtypes = set(dtype_dist_placeholders.keys()) | set(
+            dtype_dirst_tensors.keys()
+        )
+        if print_table:
+            dtype_dist = {
+                "Dtype": all_dtypes,
+                "Placeholder Count": [
+                    (
+                        dtype_dist_placeholders[key]
+                        if key in dtype_dist_placeholders
+                        else 0
+                    )
+                    for key in all_dtypes
+                ],
+                "Tensor Count": [
+                    (dtype_dirst_tensors[key] if key in dtype_dirst_tensors else 0)
+                    for key in all_dtypes
+                ],
+            }
+        else:
+            dtype_dist = dict(dtype_dist_placeholders + dtype_dirst_tensors)
+        to_print += _format_dict(dtype_dist, print_table) + "\n"
         _dump_str(to_print, path_to_dump)
         return self
 
@@ -421,17 +487,20 @@ def _compare_outputs(
             raise e
 
 
-def _get_dtype_distribution(graph: Graph) -> dict:
-    """Counts the occurences of placeholder data types in a graph.
-    The result is a dict {'data type':'number of placeholders'}
+def _get_dtype_distribution(graph: Graph) -> tuple[dict, dict]:
+    """Counts the occurences of placeholder and call_function dtypes in a graph.
+    The result is a tuple of Counters (placeholder_distribution, call_function_distribution)
     """
-    return Counter(
-        [
-            node.meta["val"].dtype
-            for node in list(graph.nodes)
-            if node.op == "placeholder"
-        ]
-    )
+    placeholder_dtypes = []
+    call_function_dtypes = []
+    for node in graph.nodes:
+        if node.op == "placeholder":
+            placeholder_dtypes.append(str(node.meta["val"].dtype))
+        if node.op == "call_function":
+            if "val" in node.meta:
+                dtype, _, _ = extract_tensor_meta(node.meta)
+                call_function_dtypes.append(ts.DTypeNames[dtype])
+    return Counter(placeholder_dtypes), Counter(call_function_dtypes)
 
 
 def _get_operator_distribution(graph: Graph) -> dict[str, int]:
@@ -443,13 +512,71 @@ def _get_operator_distribution(graph: Graph) -> dict[str, int]:
     )
 
 
+def _format_export_graph_signature(signature: ExportGraphSignature) -> str:
+    def specs_dict(specs: list[InputSpec | OutputSpec], title: str):
+        _dict: dict[str, list] = {title: [], "arg": [], "kind": [], "target": []}
+        for i, spec in enumerate(specs):
+            _dict[title].append(i)
+            _dict["arg"].append(spec.arg)
+            _dict["kind"].append(spec.kind)
+            _dict["target"].append(spec.target if spec.target else "-")
+        return _dict
+
+    input_dict = specs_dict(signature.input_specs, "Inputs")
+    output_dict = specs_dict(signature.output_specs, "Outputs")
+
+    return f"{_format_dict(input_dict)}\n{_format_dict(output_dict)}"
+
+
+def _get_tosa_operator_distribution(
+    graph_module: torch.fx.GraphModule, print_table=False
+) -> str:
+    """Counts the occurences of operator names of all lowered modules containing
+    a TOSA flatbuffer.
+    The result is a string with the operator distribution or an error message.
+    """
+    op_list = []
+    id = 0
+    while lowered_module := getattr(graph_module, f"lowered_module_{id}", None):
+        for spec in lowered_module.compile_specs:
+            if spec.key != "output_format":
+                continue
+            if spec.value == b"tosa":
+                tosa_fb = lowered_module.processed_bytes
+                tosa_json = dbg_tosa_fb_to_json(tosa_fb)
+                for region in tosa_json["regions"]:
+                    for block in region["blocks"]:
+                        op_list.extend(
+                            [operator["op"] for operator in block["operators"]]
+                        )
+                break
+            elif spec.value == b"vela":
+                return "Can not get operator distribution for Vela command stream."
+            else:
+                return f"Unknown output format '{spec.value}'."
+        id += 1
+    if id == 0:
+        return "No delegate with name 'lowered_module_0 found in graph module."
+    op_dist = dict(Counter(op_list))
+    op_dist = {
+        "Operator": list(op_dist.keys()),
+        "Count": [item[1] for item in op_dist.items()],
+    }
+    return "TOSA operators:\n" + _format_dict(dict(op_dist), print_table)
+
+
 def _dump_str(to_print: str, path_to_dump: Optional[str] = None):
     if path_to_dump:
         with open(path_to_dump, "a") as fp:
             fp.write(to_print)
     else:
-        print(to_print)
+        logger.info(to_print)
 
 
-def _format_dict(to_print: dict) -> str:
-    return pformat(to_print, compact=True, indent=1)
+def _format_dict(to_print: dict, print_table: bool = True) -> str:
+    if isinstance(list(to_print.items())[0], Iterable) and print_table:
+        return tabulate(
+            to_print, headers="keys", tablefmt="fancy_grid", maxcolwidths=35
+        )
+    else:
+        return pformat(to_print, compact=True, indent=1)
diff --git a/docs/source/debug-backend-delegate.md b/docs/source/debug-backend-delegate.md
index 17e4afe82a..68914aaed9 100644
--- a/docs/source/debug-backend-delegate.md
+++ b/docs/source/debug-backend-delegate.md
@@ -6,7 +6,7 @@ We provide a list of util functions to give users insights on what happened to t
 The `get_delegation_info()` method provides a summary of what happened to the model after the `to_backend()` call:
 
 ```python
-from executorch.exir.backend.utils import get_delegation_info
+from executorch.devtools.backend_debug import get_delegation_info
 from tabulate import tabulate
 
 # ... After call to to_backend(), but before to_executorch()