Skip to content

Commit

Permalink
Merge 06b94d5 into e485080
Browse files Browse the repository at this point in the history
  • Loading branch information
maximyurchuk authored Jun 3, 2024
2 parents e485080 + 06b94d5 commit 0bf9f72
Showing 1 changed file with 95 additions and 24 deletions.
119 changes: 95 additions & 24 deletions ydb/ci/build_bloat/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@

import argparse
import json
from functools import partial
import os
import shutil
from concurrent.futures import ProcessPoolExecutor

HEADER_COMPILE_TIME_TO_SHOW = 0.5 # sec


def sanitize_path(path: str, base_src_dir: str) -> str:
home_dir = os.environ["HOME"]
ya_build_path_chunk = ".ya/build/build_root"
ya_tools_path_chunk = ".ya/tools"
if ya_build_path_chunk in path:
# remove path to before .ya
path = path[path.find(ya_build_path_chunk) :]
Expand All @@ -19,10 +21,19 @@ def sanitize_path(path: str, base_src_dir: str) -> str:
splitted = path.split(os.sep)
del splitted[3:5]
path = os.sep.join(splitted)
elif ya_tools_path_chunk in path:
# remove path to before .ya
path = path[path.find(ya_tools_path_chunk) :]

# remove temporary nodes dir names
splitted = path.split(os.sep)
del splitted[3]
path = os.sep.join(splitted)
else:
if not base_src_dir.endswith("/"):
base_src_dir += "/"
path = path.removeprefix(base_src_dir)
path = path.removeprefix(home_dir)

return "src/" + path

Expand Down Expand Up @@ -190,68 +201,98 @@ def generate_cpp_bloat(build_output_dir: str, result_dir: str, base_src_dir: str
return tree


def main(build_output_dir, html_output):
tree = generate_time_bloat(build_output_dir=build_output_dir)

shutil.copytree("html", html_output, dirs_exist_ok=True)

with open(os.path.join(html_output, "bloat.json"), "w") as f:
f.write("var kTree = ")
json.dump(tree, f, indent=4)
def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int, str]], dict]:
print("Processing includes in {}".format(trace_path))


def parse_includes(path: str) -> list[tuple[int, str]]:
print("Processing includes in {}".format(path))

with open(path) as f:
with open(trace_path) as f:
obj = json.load(f)

cpp_file = None
include_events = [] # (time, +-1, path)

for event in obj["traceEvents"]:
if event["name"] == "Source":
path = event["args"]["detail"]
path = sanitize_path(path, base_src_dir)
time_stamp = event["ts"]
duration = event["dur"]
include_events.append((time_stamp, +1, path))
include_events.append((time_stamp + duration, -1, path))

if event["name"] == "OptModule":
cpp_file = event["args"]["detail"]

include_events.sort(key=lambda event: (event[0], -event[1]))

path_to_time = {}
current_includes_stack = []
last_time_stamp = None
current_includes_stack = [(cpp_file, 0)]
last_time_stamp = 0
time_parts = {} # header/cpp -> (header -> (cnt, total time))
if cpp_file is None:
print("Can't determine cpp file for {}".format(trace_path))
return path_to_time, time_parts

for time_stamp, ev, path in include_events:
if current_includes_stack:
last_path = current_includes_stack[-1]
last_path, _ = current_includes_stack[-1]
prev = path_to_time.get(last_path, 0)
path_to_time[last_path] = prev + (time_stamp - last_time_stamp) / 1000 / 1000

if ev == 1:
current_includes_stack.append(path)
current_includes_stack.append((path, time_stamp))
else:
assert current_includes_stack[-1] == path
current_path, include_ts = current_includes_stack[-1]
assert current_path == path
current_includes_stack.pop()
parent_path = current_includes_stack[-1][0]
if parent_path not in time_parts:
time_parts[parent_path] = {}

if current_path not in time_parts[parent_path]:
time_parts[parent_path][current_path] = [0, 0]

time_parts[parent_path][current_path][0] += 1
time_parts[parent_path][current_path][1] += (time_stamp - include_ts) / 1000 / 1000

last_time_stamp = time_stamp

return path_to_time
return path_to_time, time_parts


def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir: str) -> dict:
time_trace_paths = gather_time_traces(build_output_dir)

path_to_stat = {} # header path -> (total_duration, count)
total_time_parts = {} # header/cpp path -> (header -> (inclusion count, time spend) )
with ProcessPoolExecutor() as executor:
res = executor.map(parse_includes, time_trace_paths)
for fn_res in res:
for path, duration in fn_res.items():
path = sanitize_path(path, base_src_dir)
fn = partial(parse_includes, base_src_dir=base_src_dir)
res = executor.map(fn, time_trace_paths)
for path_to_time, time_parts in res:
for path, duration in path_to_time.items():
if path not in path_to_stat:
path_to_stat[path] = [0, 0]
path_to_stat[path][0] += duration
path_to_stat[path][1] += 1

for path in time_parts:
if path not in total_time_parts:
total_time_parts[path] = {}

for subpath in time_parts[path]:
if subpath not in total_time_parts[path]:
total_time_parts[path][subpath] = [0, 0]

total_time_parts[path][subpath][0] += time_parts[path][subpath][0]
total_time_parts[path][subpath][1] += time_parts[path][subpath][1]

for path in total_time_parts:
print("*** {}".format(path))
for subpath in total_time_parts[path]:
count, total_time_ms = total_time_parts[path][subpath]
print(" {} -> total {:.2f}s (included {} times)".format(subpath, total_time_ms, count))
print("")


result = []

for path, (duration, cnt) in path_to_stat.items():
Expand All @@ -260,13 +301,43 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir:

tree = {}

headers_compile_duration = []

for duration, cnt, path in result:
path_chunks = path.split(os.sep)
path_chunks[-1] = path_chunks[-1] + " (total {} times)".format(cnt)
path_chunks_count = len(path_chunks)
chunks = list(zip(path_chunks, (path_chunks_count - 1) * ["dir"] + ["h"]))
add_to_tree(chunks, int(duration * 1000), tree)
print("{} -> {:.2f}s (aggregated {} times)".format(path, duration, cnt))
headers_compile_duration.append({
"path": path,
"inclusion_count": cnt,
"mean_compilation_time_s": duration / cnt,
})

time_parts = {}

for path in total_time_parts:
time_part = []
for subpath in total_time_parts[path]:
inclusion_count, total_s = total_time_parts[path][subpath]
time_part.append({
"path": subpath,
"inclusion_count": inclusion_count,
"total_time_s": total_s,
})
time_part.sort(key=lambda val: -val["total_time_s"])
time_parts[path] = time_part


human_readable_output = {
"headers_compile_duration": headers_compile_duration,
"time_parts": time_parts,
}

with open(os.path.join(result_dir, "output.json"), "w") as f:
json.dump(human_readable_output, f, indent=4)

propogate_area(tree)
enrich_names_with_sec(tree)
Expand Down

0 comments on commit 0bf9f72

Please sign in to comment.