Skip to content

Commit

Permalink
SPMI: Avoid reading entire details CSV file into memory (dotnet#99116)
Browse files Browse the repository at this point in the history
Switch to a streaming approach when processing the details CSV file to
save on the peak memory usage. We still need to save the rows with diffs
to be able to be able to pick examples to disassembly, but that list is
expected to be way smaller than the full set of methods replayed.
  • Loading branch information
jakobbotsch committed Feb 29, 2024
1 parent e6bce51 commit 0eb3a6f
Showing 1 changed file with 17 additions and 15 deletions.
32 changes: 17 additions & 15 deletions src/coreclr/scripts/superpmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,8 @@ def create_artifacts_base_name(coreclr_args, mch_file):
def read_csv(path):
with open(path, encoding="utf-8") as csv_file:
reader = csv.DictReader(csv_file)
return list(reader)
for row in reader:
yield row

def decode_clrjit_build_string(clrjit_path):
""" Obtain information about the compiler that was used to compile the clrjit at the specified path.
Expand Down Expand Up @@ -1709,8 +1710,8 @@ def replay(self):
command = [self.superpmi_path] + flags + [self.jit_path, mch_file]
(return_code, replay_output) = run_and_log_return_output(command)

details = read_csv(details_info_file)
print_superpmi_result(return_code, self.coreclr_args, self.aggregate_replay_metrics(details), None)
replay_metrics = self.aggregate_replay_metrics(details_info_file)
print_superpmi_result(return_code, self.coreclr_args, replay_metrics, None)

if return_code != 0:
# Don't report as replay failure missing data (return code 3).
Expand Down Expand Up @@ -1751,8 +1752,8 @@ def replay(self):

return result

def aggregate_replay_metrics(self, details):
""" Given the CSV details file output by SPMI for a replay aggregate the
def aggregate_replay_metrics(self, details_file):
""" Given a path to a CSV details file output by SPMI for a replay aggregate the
successes, misses and failures
Returns:
Expand All @@ -1762,7 +1763,7 @@ def aggregate_replay_metrics(self, details):
num_successes = 0
num_misses = 0
num_failures = 0
for row in details:
for row in read_csv(details_file):
result = row["Result"]
if result == "Success":
num_successes += 1
Expand Down Expand Up @@ -1860,8 +1861,8 @@ def __enter__(self):
def __exit__(self, *args):
self.write_fh.write("\n\n</div></details>\n")

def aggregate_diff_metrics(details):
""" Given the CSV details file output by SPMI for a diff aggregate the metrics.
def aggregate_diff_metrics(details_file):
""" Given the path to a CSV details file output by SPMI for a diff aggregate the metrics.
"""

base_minopts = {"Successful compiles": 0, "Missing compiles": 0, "Failing compiles": 0,
Expand All @@ -1873,7 +1874,9 @@ def aggregate_diff_metrics(details):
diff_minopts = base_minopts.copy()
diff_fullopts = base_minopts.copy()

for row in details:
diffs = []

for row in read_csv(details_file):
base_result = row["Base result"]

if row["MinOpts"] == "True":
Expand Down Expand Up @@ -1926,6 +1929,7 @@ def aggregate_diff_metrics(details):
if row["Has diff"] == "True":
base_dict["Contexts with diffs"] += 1
diff_dict["Contexts with diffs"] += 1
diffs.append(row)

base_overall = base_minopts.copy()
for k in base_overall.keys():
Expand All @@ -1948,7 +1952,8 @@ def aggregate_diff_metrics(details):
d["Relative PerfScore Geomean (Diffs)"] = 1

return ({"Overall": base_overall, "MinOpts": base_minopts, "FullOpts": base_fullopts},
{"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts})
{"Overall": diff_overall, "MinOpts": diff_minopts, "FullOpts": diff_fullopts},
diffs)


class SuperPMIReplayAsmDiffs:
Expand Down Expand Up @@ -2150,8 +2155,7 @@ def replay_with_asm_diffs(self):
command = [self.superpmi_path] + flags + [self.base_jit_path, self.diff_jit_path, mch_file]
return_code = run_and_log(command)

details = read_csv(detailed_info_file)
(base_metrics, diff_metrics) = aggregate_diff_metrics(details)
(base_metrics, diff_metrics, diffs) = aggregate_diff_metrics(detailed_info_file)

print_superpmi_result(return_code, self.coreclr_args, base_metrics, diff_metrics)
artifacts_base_name = create_artifacts_base_name(self.coreclr_args, mch_file)
Expand All @@ -2171,7 +2175,6 @@ def replay_with_asm_diffs(self):
repro_base_command_line = "{} {} {}".format(self.superpmi_path, " ".join(altjit_asm_diffs_flags), self.diff_jit_path)
save_repro_mc_files(temp_location, self.coreclr_args, artifacts_base_name, repro_base_command_line)

diffs = [r for r in details if r["Has diff"] == "True"]
if any(diffs):
files_with_asm_diffs.append(mch_file)

Expand Down Expand Up @@ -2922,8 +2925,7 @@ def replay_with_throughput_diff(self):
command_string = " ".join(command)
logging.debug("'%s': Error return code: %s", command_string, return_code)

details = read_csv(detailed_info_file)
(base_metrics, diff_metrics) = aggregate_diff_metrics(details)
(base_metrics, diff_metrics, _) = aggregate_diff_metrics(detailed_info_file)

if base_metrics is not None and diff_metrics is not None:
base_instructions = base_metrics["Overall"]["Diff executed instructions"]
Expand Down

0 comments on commit 0eb3a6f

Please sign in to comment.