diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 4d227ba3baec4a..ed38ff201e7924 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -11,13 +11,14 @@ import argparse import collections from datetime import date +import enum import itertools import json import os from pathlib import Path import re import sys -from typing import TextIO, TypeAlias +from typing import Callable, TextIO, TypeAlias OpcodeStats: TypeAlias = dict[str, dict[str, int]] @@ -25,6 +26,7 @@ Defines: TypeAlias = dict[int, list[str]] Rows: TypeAlias = list[tuple] Columns: TypeAlias = tuple[str, ...] +RowCalculator: TypeAlias = Callable[["Stats"], Rows] if os.name == "nt": @@ -44,6 +46,8 @@ def pretty(name: str) -> str: class Stats: + _data: dict + def __init__(self, input: Path): super().__init__() @@ -57,7 +61,7 @@ def __init__(self, input: Path): self._data["_defines"] = {int(k): v for k, v in self["_defines"].items()} elif input.is_dir(): - stats = collections.Counter() + stats = collections.Counter[str]() for filename in input.iterdir(): with open(filename) as fd: @@ -177,25 +181,14 @@ def markdown(self) -> str: return format(self, ",d") -class CountPer: - def __init__(self, num: int, den: int): +class Ratio: + def __init__(self, num: int, den: int, percentage: bool = True): self.num = num self.den = den + self.percentage = percentage if den == 0 and num != 0: raise ValueError("Invalid denominator") - def markdown(self) -> str: - if self.den == 0: - return "0" - else: - return f"{int(self.num / self.den):,d}" - - -class Ratio: - def __init__(self, num: int, den: int): - self.num = num - self.den = den - def __float__(self): if self.den == 0: return 0.0 @@ -205,8 +198,10 @@ def __float__(self): def markdown(self) -> str: if self.den == 0: return "" - else: + elif self.percentage: return f"{self.num / self.den:,.01%}" + else: + return f"{self.num / self.den:,.02f}" class DiffRatio(Ratio): @@ -217,32 +212,63 @@ def __init__(self, base: int | str, head: int | str): super().__init__(head - base, base) -def sort_by_last_column(rows: Rows): - rows.sort(key=lambda row: abs(float(row[-1])), reverse=True) - - -def dont_sort(rows: Rows): - pass +class JoinMode(enum.Enum): + # Join using the first column as a key + SIMPLE = 0 + # Join using the first column as a key, and indicate the change in the + # second column of each input table as a new column + CHANGE = 1 + # Join using the first column as a key, indicating the change in the second + # column of each input table as a ne column, and omit all other columns + CHANGE_ONE_COLUMN = 2 class Table: - columns: Columns - sort_by_last_column: bool = False + """ + A Table defines how to convert a set of Stats into a specific set of rows + displaying some aspect of the data. + """ - def calculate_rows(self, stats: Stats) -> Rows: - raise NotImplementedError() + def __init__( + self, + column_names: Columns, + calc_rows: RowCalculator, + join_mode: JoinMode = JoinMode.SIMPLE, + ): + self.columns = column_names + self.calc_rows = calc_rows + self.join_mode = join_mode def join_row(self, key: str, row_a: tuple, row_b: tuple) -> tuple: - return (key, *row_a, *row_b) + match self.join_mode: + case JoinMode.SIMPLE: + return (key, *row_a, *row_b) + case JoinMode.CHANGE: + return (key, *row_a, *row_b, DiffRatio(row_a[0], row_b[0])) + case JoinMode.CHANGE_ONE_COLUMN: + return (key, row_a[0], row_b[0], DiffRatio(row_a[0], row_b[0])) def join_columns(self, columns: Columns) -> Columns: - return ( - columns[0], - *("Base " + x for x in columns[1:]), - *("Head " + x for x in columns[1:]), - ) - - sort_joined_rows = staticmethod(dont_sort) + match self.join_mode: + case JoinMode.SIMPLE: + return ( + columns[0], + *("Base " + x for x in columns[1:]), + *("Head " + x for x in columns[1:]), + ) + case JoinMode.CHANGE: + return ( + columns[0], + *("Base " + x for x in columns[1:]), + *("Head " + x for x in columns[1:]), + ) + ("Change:",) + case JoinMode.CHANGE_ONE_COLUMN: + return ( + columns[0], + "Base " + columns[1], + "Head " + columns[1], + "Change:", + ) def join_tables(self, rows_a: Rows, rows_b: Rows) -> tuple[Columns, Rows]: ncols = len(self.columns) @@ -261,105 +287,58 @@ def join_tables(self, rows_a: Rows, rows_b: Rows) -> tuple[Columns, Rows]: self.join_row(k, data_a.get(k, default), data_b.get(k, default)) for k in keys ] - if self.sort_by_last_column: + if self.join_mode in (JoinMode.CHANGE, JoinMode.CHANGE_ONE_COLUMN): rows.sort(key=lambda row: abs(float(row[-1])), reverse=True) columns = self.join_columns(self.columns) return columns, rows - def get_rows( + def get_table( self, base_stats: Stats, head_stats: Stats | None = None ) -> tuple[Columns, Rows]: if head_stats is None: - rows = self.calculate_rows(base_stats) + rows = self.calc_rows(base_stats) return self.columns, rows else: - rows_a = self.calculate_rows(base_stats) - rows_b = self.calculate_rows(head_stats) + rows_a = self.calc_rows(base_stats) + rows_b = self.calc_rows(head_stats) cols, rows = self.join_tables(rows_a, rows_b) return cols, rows - def output_markdown( - self, - out: TextIO, - base_stats: Stats, - head_stats: Stats | None = None, - level: int = 2, - ) -> None: - header, rows = self.get_rows(base_stats, head_stats) - if len(rows) == 0: - return - def to_markdown(x): - if hasattr(x, "markdown"): - return x.markdown() - elif isinstance(x, str): - return x - elif x is None: - return "" - else: - raise TypeError(f"Can't convert {x} to markdown") - - width = len(header) - header_line = "|" - under_line = "|" - for item in header: - under = "---" - if item.endswith(":"): - item = item[:-1] - under += ":" - header_line += item + " | " - under_line += under + "|" - print(header_line, file=out) - print(under_line, file=out) - for row in rows: - if width is not None and len(row) != width: - raise ValueError("Wrong number of elements in row '" + str(row) + "'") - print("|", " | ".join(to_markdown(i) for i in row), "|", file=out) - print(file=out) - - -class FixedTable(Table): - def __init__(self, columns: Columns, rows: Rows): - self.columns = columns - self.rows = rows - - def get_rows(self, *args) -> tuple[Columns, Rows]: - return self.columns, self.rows - - -class SimpleChangeTable(Table): +class Section: """ - Base class of tables where the comparison table has an extra column "Change" - computed from the change of the second column of the base and head. Sorted - by the "Change" column. + A Section defines a section of the output document. """ - def join_row(self, key: str, base_data: tuple, head_data: tuple) -> tuple: - return (key, *base_data, *head_data, DiffRatio(base_data[0], head_data[0])) - - def join_columns(self, columns: Columns) -> Columns: - return super().join_columns(columns) + ("Change:",) - - sort_joined_rows = staticmethod(sort_by_last_column) - - -class ExecutionCountTable(Table): - columns = ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:") - - def __init__(self, prefix: str): - self.prefix = prefix + def __init__( + self, + title: str = "", + summary: str = "", + part_iter=None, + comparative: bool = True, + ): + self.title = title + if not summary: + self.summary = title.lower() + else: + self.summary = summary + if part_iter is None: + part_iter = [] + if isinstance(part_iter, list): - def join_row(self, key: str, row_a: tuple, row_b: tuple) -> tuple: - return (key, row_a[0], row_b[0], DiffRatio(row_a[0], row_b[0])) + def iter_parts(base_stats: Stats, head_stats: Stats | None): + yield from part_iter - def join_columns(self, columns: Columns) -> Columns: - return ("Name", "Base Count:", "Head Count:", "Change") + self.part_iter = iter_parts + else: + self.part_iter = part_iter + self.comparative = comparative - sort_joined_rows = staticmethod(sort_by_last_column) - def calculate_rows(self, stats: Stats) -> Rows: - opcode_stats = stats.get_opcode_stats(self.prefix) +def calc_execution_count_table(prefix: str) -> RowCalculator: + def calc(stats: Stats) -> Rows: + opcode_stats = stats.get_opcode_stats(prefix) total = 0 counts = [] for name, opcode_stat in opcode_stats.items(): @@ -390,11 +369,25 @@ def calculate_rows(self, stats: Stats) -> Rows: ) return rows + return calc -class PairCountTable(Table): - columns = ("Pair", "Count:", "Self:", "Cumulative:") - def calculate_rows(self, stats: Stats) -> Rows: +def execution_count_section() -> Section: + return Section( + "Execution counts", + "execution counts for all instructions", + [ + Table( + ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), + calc_execution_count_table("opcode"), + join_mode=JoinMode.CHANGE_ONE_COLUMN, + ) + ], + ) + + +def pair_count_section() -> Section: + def calc_pair_count_table(stats: Stats) -> Rows: pair_counts = stats.get_pair_counts("opcode") total = stats.get_total("opcode") @@ -413,68 +406,87 @@ def calculate_rows(self, stats: Stats) -> Rows: return rows + return Section( + "Pair counts", + "Pair counts for top 100 pairs", + [ + Table( + ("Pair", "Count:", "Self:", "Cumulative:"), + calc_pair_count_table, + ) + ], + comparative=False, + ) -def iter_pre_succ_pairs_tables(base_stats: Stats, head_stats: Stats | None = None): - assert head_stats is None - opcode_stats = base_stats.get_opcode_stats("opcode") - pair_counts = base_stats.get_pair_counts("opcode") +def pre_succ_pairs_section() -> Section: + def iter_pre_succ_pairs_tables(base_stats: Stats, head_stats: Stats | None = None): + assert head_stats is None - predecessors: dict[str, collections.Counter] = collections.defaultdict( - collections.Counter - ) - successors: dict[str, collections.Counter] = collections.defaultdict( - collections.Counter - ) - total_predecessors: collections.Counter = collections.Counter() - total_successors: collections.Counter = collections.Counter() - for count, (first, second) in pair_counts: - if count: - predecessors[second][first] = count - successors[first][second] = count - total_predecessors[second] += count - total_successors[first] += count - - for name in opcode_stats.keys(): - total1 = total_predecessors[name] - total2 = total_successors[name] - if total1 == 0 and total2 == 0: - continue - pred_rows: Rows = [] - succ_rows: Rows = [] - if total1: - pred_rows = [ - (pred, Count(count), Ratio(count, total1)) - for (pred, count) in predecessors[name].most_common(5) - ] - if total2: - succ_rows = [ - (succ, Count(count), Ratio(count, total2)) - for (succ, count) in successors[name].most_common(5) - ] + opcode_stats = base_stats.get_opcode_stats("opcode") + pair_counts = base_stats.get_pair_counts("opcode") - yield Section( - name, - f"Successors and predecessors for {name}", - [ - FixedTable(("Predecessors", "Count:", "Percentage:"), pred_rows), - FixedTable(("Successors", "Count:", "Percentage:"), succ_rows), - ], + predecessors: dict[str, collections.Counter] = collections.defaultdict( + collections.Counter ) + successors: dict[str, collections.Counter] = collections.defaultdict( + collections.Counter + ) + total_predecessors: collections.Counter = collections.Counter() + total_successors: collections.Counter = collections.Counter() + for count, (first, second) in pair_counts: + if count: + predecessors[second][first] = count + successors[first][second] = count + total_predecessors[second] += count + total_successors[first] += count + + for name in opcode_stats.keys(): + total1 = total_predecessors[name] + total2 = total_successors[name] + if total1 == 0 and total2 == 0: + continue + pred_rows: Rows = [] + succ_rows: Rows = [] + if total1: + pred_rows = [ + (pred, Count(count), Ratio(count, total1)) + for (pred, count) in predecessors[name].most_common(5) + ] + if total2: + succ_rows = [ + (succ, Count(count), Ratio(count, total2)) + for (succ, count) in successors[name].most_common(5) + ] + yield Section( + name, + f"Successors and predecessors for {name}", + [ + Table( + ("Predecessors", "Count:", "Percentage:"), + lambda *_: pred_rows, + ), + Table( + ("Successors", "Count:", "Percentage:"), + lambda *_: succ_rows, + ), + ], + ) -def iter_specialization_tables(base_stats: Stats, head_stats: Stats | None = None): - class SpecializationTable(SimpleChangeTable): - columns = ("Kind", "Count:", "Ratio:") - - def __init__(self, name: str): - self.name = name + return Section( + "Predecessor/Successor Pairs", + "Top 5 predecessors and successors of each opcode", + iter_pre_succ_pairs_tables, + comparative=False, + ) - sort_joined_rows = staticmethod(dont_sort) - def calculate_rows(self, stats: Stats) -> Rows: +def specialization_section() -> Section: + def calc_specialization_table(name: str) -> RowCalculator: + def calc(stats: Stats) -> Rows: opcode_stats = stats.get_opcode_stats("opcode") - family_stats = opcode_stats[self.name] + family_stats = opcode_stats[name] total = sum(family_stats.get(kind, 0) for kind in TOTAL) if total == 0: return [] @@ -502,17 +514,12 @@ def calculate_rows(self, stats: Stats) -> Rows: ) return rows - class SpecializationSuccessFailureTable(SimpleChangeTable): - columns = ("", "Count:", "Ratio:") - - def __init__(self, name: str): - self.name = name - - sort_joined_rows = staticmethod(dont_sort) + return calc - def calculate_rows(self, stats: Stats) -> Rows: + def calc_specialization_success_failure_table(name: str) -> RowCalculator: + def calc(stats: Stats) -> Rows: opcode_stats = stats.get_opcode_stats("opcode") - family_stats = opcode_stats[self.name] + family_stats = opcode_stats[name] total_attempts = 0 for key in ("specialization.success", "specialization.failure"): total_attempts += family_stats.get(key, 0) @@ -525,13 +532,10 @@ def calculate_rows(self, stats: Stats) -> Rows: rows.append((label, Count(val), Ratio(val, total_attempts))) return rows - class SpecializationFailureKindTable(SimpleChangeTable): - columns = ("Failure kind", "Count:", "Ratio:") + return calc - def __init__(self, name: str): - self.name = name - - def calculate_rows(self, stats: Stats) -> Rows: + def calc_specialization_failure_kind_table(name: str) -> RowCalculator: + def calc(stats: Stats) -> Rows: def kind_to_text(kind: int, defines: Defines, opname: str): if kind <= 8: return pretty(defines[kind][0]) @@ -550,7 +554,7 @@ def kind_to_text(kind: int, defines: Defines, opname: str): defines = stats.defines opcode_stats = stats.get_opcode_stats("opcode") - family_stats = opcode_stats[self.name] + family_stats = opcode_stats[name] total_failures = family_stats.get("specialization.failure", 0) failure_kinds = [0] * 40 for key in family_stats: @@ -566,137 +570,174 @@ def kind_to_text(kind: int, defines: Defines, opname: str): continue rows.append( ( - kind_to_text(index, defines, self.name), + kind_to_text(index, defines, name), Count(value), Ratio(value, total_failures), ) ) return rows - opcode_base_stats = base_stats.get_opcode_stats("opcode") - names = opcode_base_stats.keys() - if head_stats is not None: - opcode_head_stats = head_stats.get_opcode_stats("opcode") - names &= opcode_head_stats.keys() # type: ignore - else: - opcode_head_stats = {} - - for name in sorted(names): - if "specializable" not in opcode_base_stats.get(name, {}): - continue - total = sum( - stats.get(name, {}).get(kind, 0) - for kind in TOTAL - for stats in (opcode_base_stats, opcode_head_stats) - ) - if total == 0: - continue - yield Section( - name, - f"specialization stats for {name} family", - [ - SpecializationTable(name), - SpecializationSuccessFailureTable(name), - SpecializationFailureKindTable(name), - ], - ) - + return calc -def iter_specialization_effectiveness_tables( - base_stats: Stats, head_stats: Stats | None = None -): - class SpecializationEffectivenessTable(SimpleChangeTable): - columns = ("Instructions", "Count:", "Ratio:") - - sort_joined_rows = staticmethod(dont_sort) + def iter_specialization_tables(base_stats: Stats, head_stats: Stats | None = None): + opcode_base_stats = base_stats.get_opcode_stats("opcode") + names = opcode_base_stats.keys() + if head_stats is not None: + opcode_head_stats = head_stats.get_opcode_stats("opcode") + names &= opcode_head_stats.keys() # type: ignore + else: + opcode_head_stats = {} - def calculate_rows(self, stats: Stats) -> Rows: - opcode_stats = stats.get_opcode_stats("opcode") - total = stats.get_total("opcode") - specialized_instructions = stats.specialized_instructions - - basic = 0 - specialized = 0 - not_specialized = 0 - for name, opcode_stat in opcode_stats.items(): - if "execution_count" not in opcode_stat: - continue - count = opcode_stat["execution_count"] - if "specializable" in opcode_stat: - not_specialized += count - elif name in specialized_instructions: - miss = opcode_stat.get("specialization.miss", 0) - not_specialized += miss - specialized += count - miss - else: - basic += count + for name in sorted(names): + if "specializable" not in opcode_base_stats.get(name, {}): + continue + total = sum( + stats.get(name, {}).get(kind, 0) + for kind in TOTAL + for stats in (opcode_base_stats, opcode_head_stats) + ) + if total == 0: + continue + yield Section( + name, + f"specialization stats for {name} family", + [ + Table( + ("Kind", "Count:", "Ratio:"), + calc_specialization_table(name), + JoinMode.CHANGE, + ), + Table( + ("", "Count:", "Ratio:"), + calc_specialization_success_failure_table(name), + JoinMode.CHANGE, + ), + Table( + ("Failure kind", "Count:", "Ratio:"), + calc_specialization_failure_kind_table(name), + JoinMode.CHANGE, + ), + ], + ) - return [ - ("Basic", Count(basic), Ratio(basic, total)), - ( - "Not specialized", - Count(not_specialized), - Ratio(not_specialized, total), - ), - ("Specialized", Count(specialized), Ratio(specialized, total)), - ] + return Section( + "Specialization stats", + "specialization stats by family", + iter_specialization_tables, + ) - class DeferredByInstructionTable(SimpleChangeTable): - columns = ("Name", "Count:", "Ratio:") - def calculate_rows(self, stats: Stats) -> Rows: - opcode_stats = stats.get_opcode_stats("opcode") +def specialization_effectiveness_section() -> Section: + def calc_specialization_effectiveness_table(stats: Stats) -> Rows: + opcode_stats = stats.get_opcode_stats("opcode") + total = stats.get_total("opcode") + specialized_instructions = stats.specialized_instructions - total = 0 - counts = [] - for name, opcode_stat in opcode_stats.items(): - value = opcode_stat.get("specialization.deferred", 0) - counts.append((value, name)) - total += value - counts.sort(reverse=True) - if total: - return [ - (name, Count(count), Ratio(count, total)) - for (count, name) in counts[:10] - ] + basic = 0 + specialized = 0 + not_specialized = 0 + for name, opcode_stat in opcode_stats.items(): + if "execution_count" not in opcode_stat: + continue + count = opcode_stat["execution_count"] + if "specializable" in opcode_stat: + not_specialized += count + elif name in specialized_instructions: + miss = opcode_stat.get("specialization.miss", 0) + not_specialized += miss + specialized += count - miss else: - return [] + basic += count - class MissesByInstructionTable(SimpleChangeTable): - columns = ("Name", "Count:", "Ratio:") + return [ + ("Basic", Count(basic), Ratio(basic, total)), + ( + "Not specialized", + Count(not_specialized), + Ratio(not_specialized, total), + ), + ("Specialized", Count(specialized), Ratio(specialized, total)), + ] - def calculate_rows(self, stats: Stats) -> Rows: - opcode_stats = stats.get_opcode_stats("opcode") + def calc_deferred_by_table(stats: Stats) -> Rows: + opcode_stats = stats.get_opcode_stats("opcode") - total = 0 - counts = [] - for name, opcode_stat in opcode_stats.items(): - # Avoid double counting misses - if "specializable" in opcode_stat: - continue - value = opcode_stat.get("specialization.misses", 0) - counts.append((value, name)) - total += value - counts.sort(reverse=True) - if total: - return [ - (name, Count(count), Ratio(count, total)) - for (count, name) in counts[:10] - ] - else: - return [] + total = 0 + counts = [] + for name, opcode_stat in opcode_stats.items(): + value = opcode_stat.get("specialization.deferred", 0) + counts.append((value, name)) + total += value + counts.sort(reverse=True) + if total: + return [ + (name, Count(count), Ratio(count, total)) + for (count, name) in counts[:10] + ] + else: + return [] - yield SpecializationEffectivenessTable() - yield Section("Deferred by instruction", "", [DeferredByInstructionTable()]) - yield Section("Misses by instruction", "", [MissesByInstructionTable()]) + def calc_misses_by_table(stats: Stats) -> Rows: + opcode_stats = stats.get_opcode_stats("opcode") + + total = 0 + counts = [] + for name, opcode_stat in opcode_stats.items(): + # Avoid double counting misses + if "specializable" in opcode_stat: + continue + value = opcode_stat.get("specialization.misses", 0) + counts.append((value, name)) + total += value + counts.sort(reverse=True) + if total: + return [ + (name, Count(count), Ratio(count, total)) + for (count, name) in counts[:10] + ] + else: + return [] + def iter_specialization_effectiveness_tables( + base_stats: Stats, head_stats: Stats | None = None + ): + yield Table( + ("Instructions", "Count:", "Ratio:"), + calc_specialization_effectiveness_table, + JoinMode.CHANGE, + ) + yield Section( + "Deferred by instruction", + "", + [ + Table( + ("Name", "Count:", "Ratio:"), + calc_deferred_by_table, + JoinMode.CHANGE, + ) + ], + ) + yield Section( + "Misses by instruction", + "", + [ + Table( + ("Name", "Count:", "Ratio:"), + calc_misses_by_table, + JoinMode.CHANGE, + ) + ], + ) -class CallStatsTable(SimpleChangeTable): - columns = ("", "Count:", "Ratio:") + return Section( + "Specialization effectiveness", + "", + iter_specialization_effectiveness_tables, + ) - sort_joined_rows = staticmethod(dont_sort) - def calculate_rows(self, stats: Stats) -> Rows: +def call_stats_section() -> Section: + def calc_call_stats_table(stats: Stats) -> Rows: defines = stats.pystats_defines total = 0 @@ -720,11 +761,21 @@ def calculate_rows(self, stats: Stats) -> Rows: return rows + return Section( + "Call stats", + "Inlined calls and frame stats", + [ + Table( + ("", "Count:", "Ratio:"), + calc_call_stats_table, + JoinMode.CHANGE, + ) + ], + ) -class ObjectStatsTable(SimpleChangeTable): - columns = ("", "Count:", "Ratio:") - def calculate_rows(self, stats: Stats) -> Rows: +def object_stats_section() -> Section: + def calc_object_stats_table(stats: Stats) -> Rows: total_materializations = stats.get("Object new values", 0) total_allocations = stats.get("Object allocations", 0) + stats.get( "Object allocations from freelist", 0 @@ -753,11 +804,21 @@ def calculate_rows(self, stats: Stats) -> Rows: rows.append((label, Count(value), ratio)) return rows + return Section( + "Object stats", + "allocations, frees and dict materializatons", + [ + Table( + ("", "Count:", "Ratio:"), + calc_object_stats_table, + JoinMode.CHANGE, + ) + ], + ) -class GCStatsTable(Table): - columns = ("Generation:", "Collections:", "Objects collected:", "Object visits:") - def calculate_rows(self, stats: Stats) -> Rows: +def gc_stats_section() -> Section: + def calc_gc_stats(stats: Stats) -> Rows: gc_stats: list[dict[str, int]] = [] for key, value in stats.items(): if not key.startswith("GC"): @@ -778,239 +839,260 @@ def calculate_rows(self, stats: Stats) -> Rows: for (i, gen) in enumerate(gc_stats) ] + return Section( + "GC stats", + "GC collections and effectiveness", + [ + Table( + ("Generation:", "Collections:", "Objects collected:", "Object visits:"), + calc_gc_stats, + ) + ], + ) -def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None): - class OptimizationStatsTable(SimpleChangeTable): - columns = ("", "Count:", "Ratio:") - - sort_joined_rows = staticmethod(dont_sort) - - def calculate_rows(self, stats: Stats) -> Rows: - if "Optimization attempts" not in stats: - return [] - - attempts = stats["Optimization attempts"] - created = stats["Optimization traces created"] - executed = stats["Optimization traces executed"] - uops = stats["Optimization uops executed"] - trace_stack_overflow = stats["Optimization trace stack overflow"] - trace_stack_underflow = stats["Optimization trace stack underflow"] - trace_too_long = stats["Optimization trace too long"] - trace_too_short = stats["Optimization trace too short"] - inner_loop = stats["Optimization inner loop"] - recursive_call = stats["Optimization recursive call"] - - return [ - ("Optimization attempts", Count(attempts), ""), - ("Traces created", Count(created), Ratio(created, attempts)), - ("Traces executed", Count(executed), ""), - ("Uops executed", Count(uops), CountPer(uops, executed)), - ( - "Trace stack overflow", - Count(trace_stack_overflow), - Ratio(trace_stack_overflow, created), - ), - ( - "Trace stack underflow", - Count(trace_stack_underflow), - Ratio(trace_stack_underflow, created), - ), - ( - "Trace too long", - Count(trace_too_long), - Ratio(trace_too_long, created), - ), - ( - "Trace too short", - Count(trace_too_short), - Ratio(trace_too_short, created), - ), - ("Inner loop found", Count(inner_loop), Ratio(inner_loop, created)), - ( - "Recursive call", - Count(recursive_call), - Ratio(recursive_call, created), - ), - ] - class HistogramTable(SimpleChangeTable): - columns = ("Range", "Count:", "Ratio:") +def optimization_section() -> Section: + def calc_optimization_table(stats: Stats) -> Rows: + if "Optimization attempts" not in stats: + return [] - def __init__(self, key: str, den: str): - self.key = key - self.den = den + attempts = stats["Optimization attempts"] + created = stats["Optimization traces created"] + executed = stats["Optimization traces executed"] + uops = stats["Optimization uops executed"] + trace_stack_overflow = stats["Optimization trace stack overflow"] + trace_stack_underflow = stats["Optimization trace stack underflow"] + trace_too_long = stats["Optimization trace too long"] + trace_too_short = stats["Optimization trace too short"] + inner_loop = stats["Optimization inner loop"] + recursive_call = stats["Optimization recursive call"] - sort_joined_rows = staticmethod(dont_sort) + return [ + ("Optimization attempts", Count(attempts), ""), + ("Traces created", Count(created), Ratio(created, attempts)), + ("Traces executed", Count(executed), ""), + ("Uops executed", Count(uops), Ratio(uops, executed, percentage=False)), + ( + "Trace stack overflow", + Count(trace_stack_overflow), + Ratio(trace_stack_overflow, created), + ), + ( + "Trace stack underflow", + Count(trace_stack_underflow), + Ratio(trace_stack_underflow, created), + ), + ( + "Trace too long", + Count(trace_too_long), + Ratio(trace_too_long, created), + ), + ( + "Trace too short", + Count(trace_too_short), + Ratio(trace_too_short, created), + ), + ("Inner loop found", Count(inner_loop), Ratio(inner_loop, created)), + ( + "Recursive call", + Count(recursive_call), + Ratio(recursive_call, created), + ), + ] - def calculate_rows(self, stats: Stats) -> Rows: + def calc_histogram_table(key: str, den: str) -> RowCalculator: + def calc(stats: Stats) -> Rows: rows: Rows = [] last_non_zero = 0 for k, v in stats.items(): - if k.startswith(self.key): + if k.startswith(key): match = re.match(r".+\[([0-9]+)\]", k) if match is not None: entry = int(match.groups()[0]) if v != 0: last_non_zero = len(rows) rows.append( - (f"<= {entry:,d}", Count(v), Ratio(int(v), stats[self.den])) + ( + f"<= {entry:,d}", + Count(v), + Ratio(int(v), stats[den]), + ) ) # Don't include any zero entries at the end rows = rows[: last_non_zero + 1] return rows - class UnsupportedOpcodesTable(SimpleChangeTable): - columns = ("Opcode", "Count:") - - def calculate_rows(self, stats: Stats) -> Rows: - unsupported_opcodes = stats.get_opcode_stats("unsupported_opcode") - data = [] - for opcode, entry in unsupported_opcodes.items(): - data.append((Count(entry["count"]), opcode)) - data.sort(reverse=True) - return [(x[1], x[0]) for x in data] - - if "Optimization attempts" not in base_stats or ( - head_stats is not None and "Optimization attempts" not in head_stats - ): - return + return calc - yield OptimizationStatsTable() + def calc_unsupported_opcodes_table(stats: Stats) -> Rows: + unsupported_opcodes = stats.get_opcode_stats("unsupported_opcode") + data = [] + for opcode, entry in unsupported_opcodes.items(): + data.append((Count(entry["count"]), opcode)) + data.sort(reverse=True) + return [(x[1], x[0]) for x in data] - for name, den in [ - ("Trace length", "Optimization traces created"), - ("Optimized trace length", "Optimization traces created"), - ("Trace run length", "Optimization traces executed"), - ]: - yield Section(f"{name} histogram", "", [HistogramTable(name, den)]) + def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None): + if "Optimization attempts" not in base_stats or ( + head_stats is not None and "Optimization attempts" not in head_stats + ): + return - yield Section("Uop stats", "", [ExecutionCountTable("uops")]) - yield Section("Unsupported opcodes", "", [UnsupportedOpcodesTable()]) + yield Table(("", "Count:", "Ratio:"), calc_optimization_table, JoinMode.CHANGE) + for name, den in [ + ("Trace length", "Optimization traces created"), + ("Optimized trace length", "Optimization traces created"), + ("Trace run length", "Optimization traces executed"), + ]: + yield Section( + f"{name} histogram", + "", + [ + Table( + ("Range", "Count:", "Ratio:"), + calc_histogram_table(name, den), + JoinMode.CHANGE, + ) + ], + ) + yield Section( + "Uop stats", + "", + [ + Table( + ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), + calc_execution_count_table("uops"), + JoinMode.CHANGE_ONE_COLUMN, + ) + ], + ) + yield Section( + "Unsupported opcodes", + "", + [ + Table( + ("Opcode", "Count:"), + calc_unsupported_opcodes_table, + JoinMode.CHANGE, + ) + ], + ) + return Section( + "Optimization (Tier 2) stats", + "statistics about the Tier 2 optimizer", + iter_optimization_tables, + ) -class MetaStatsTable(Table): - columns = ("", "Count:") - def calculate_rows(self, stats: Stats) -> Rows: +def meta_stats_section() -> Section: + def calc_rows(stats: Stats) -> Rows: return [("Number of data files", Count(stats.get("__nfiles__", 0)))] + return Section( + "Meta stats", + "Meta statistics", + [Table(("", "Count:"), calc_rows, JoinMode.CHANGE)], + ) -class Section: - def __init__( - self, - title: str, - summary: str = "", - part_iter=None, - comparative: bool = True, - ): - self.title = title - if not summary: - self.summary = title.lower() - else: - self.summary = summary - if part_iter is None: - part_iter = [] - if isinstance(part_iter, list): - - def iter_parts(base_stats: Stats, head_stats: Stats | None): - yield from part_iter - self.part_iter = iter_parts +LAYOUT = [ + execution_count_section(), + pair_count_section(), + pre_succ_pairs_section(), + specialization_section(), + specialization_effectiveness_section(), + call_stats_section(), + gc_stats_section(), + optimization_section(), + meta_stats_section(), +] + + +def output_markdown( + out: TextIO, + obj: Section | Table | list, + base_stats: Stats, + head_stats: Stats | None = None, + level: int = 2, +) -> None: + def to_markdown(x): + if hasattr(x, "markdown"): + return x.markdown() + elif isinstance(x, str): + return x + elif x is None: + return "" else: - self.part_iter = part_iter - self.comparative = comparative - - def output_markdown( - self, - out: TextIO, - base_stats: Stats, - head_stats: Stats | None = None, - level: int = 1, - ) -> None: - if self.title: - print("#" * level, self.title, file=out) - print(file=out) - print("
", file=out) - print("", self.summary, "", file=out) + raise TypeError(f"Can't convert {x} to markdown") + + match obj: + case Section(): + if obj.title: + print("#" * level, obj.title, file=out) + print(file=out) + print("
", file=out) + print("", obj.summary, "", file=out) + print(file=out) + if head_stats is not None and obj.comparative is False: + print("Not included in comparative output.\n") + else: + for part in obj.part_iter(base_stats, head_stats): + output_markdown(out, part, base_stats, head_stats, level=level + 1) print(file=out) - if head_stats is not None and self.comparative is False: - print("Not included in comparative output.\n") - else: - for part in self.part_iter(base_stats, head_stats): - part.output_markdown(out, base_stats, head_stats, level=level + 1) - print(file=out) - if self.title: - print("
", file=out) + if obj.title: + print("
", file=out) + print(file=out) + + case Table(): + header, rows = obj.get_table(base_stats, head_stats) + if len(rows) == 0: + return + + width = len(header) + header_line = "|" + under_line = "|" + for item in header: + under = "---" + if item.endswith(":"): + item = item[:-1] + under += ":" + header_line += item + " | " + under_line += under + "|" + print(header_line, file=out) + print(under_line, file=out) + for row in rows: + if len(row) != width: + raise ValueError( + "Wrong number of elements in row '" + str(row) + "'" + ) + print("|", " | ".join(to_markdown(i) for i in row), "|", file=out) print(file=out) + case list(): + for part in obj: + output_markdown(out, part, base_stats, head_stats, level=level) -LAYOUT = Section( - "", - "", - [ - Section( - "Execution counts", - "execution counts for all instructions", - [ExecutionCountTable("opcode")], - ), - Section( - "Pair counts", - "Pair counts for top 100 pairs", - [PairCountTable()], - comparative=False, - ), - Section( - "Predecessor/Successor Pairs", - "Top 5 predecessors and successors of each opcode", - iter_pre_succ_pairs_tables, - comparative=False, - ), - Section( - "Specialization stats", - "specialization stats by family", - iter_specialization_tables, - ), - Section( - "Specialization effectiveness", - "", - iter_specialization_effectiveness_tables, - ), - Section("Call stats", "Inlined calls and frame stats", [CallStatsTable()]), - Section( - "Object stats", - "allocations, frees and dict materializatons", - [ObjectStatsTable()], - ), - Section("GC stats", "GC collections and effectiveness", [GCStatsTable()]), - Section( - "Optimization (Tier 2) stats", - "statistics about the Tier 2 optimizer", - iter_optimization_tables, - ), - Section("Meta stats", "Meta statistics", [MetaStatsTable()]), - ], -) - - -def output_markdown(out: TextIO, base_stats: Stats, head_stats: Stats | None = None): - LAYOUT.output_markdown(out, base_stats, head_stats) - print("---", file=out) - print("Stats gathered on:", date.today(), file=out) + print("---", file=out) + print("Stats gathered on:", date.today(), file=out) def output_stats(inputs: list[Path], json_output=TextIO | None): - if len(inputs) == 1: - stats = Stats(Path(inputs[0])) - if json_output is not None: - stats.save(json_output) - output_markdown(sys.stdout, stats) - elif len(inputs) == 2: - if json_output is not None: - raise ValueError("Can not output to JSON when there are multiple inputs") - - base_stats = Stats(Path(inputs[0])) - head_stats = Stats(Path(inputs[1])) - output_markdown(sys.stdout, base_stats, head_stats) + match len(inputs): + case 1: + stats = Stats(Path(inputs[0])) + if json_output is not None: + stats.save(json_output) # type: ignore + output_markdown(sys.stdout, LAYOUT, stats) + case 2: + if json_output is not None: + raise ValueError( + "Can not output to JSON when there are multiple inputs" + ) + + base_stats = Stats(Path(inputs[0])) + head_stats = Stats(Path(inputs[1])) + output_markdown(sys.stdout, LAYOUT, base_stats, head_stats) def main():