Skip to content

Commit

Permalink
more
Browse files Browse the repository at this point in the history
  • Loading branch information
maximyurchuk committed Jun 4, 2024
1 parent 743d47e commit d0bbe8b
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 22 deletions.
54 changes: 34 additions & 20 deletions ydb/ci/build_bloat/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

HEADER_COMPILE_TIME_TO_SHOW = 0.5 # sec


def sanitize_path(path: str, base_src_dir: str) -> str:
home_dir = os.environ["HOME"]
ya_build_path_chunk = ".ya/build/build_root"
Expand Down Expand Up @@ -121,7 +122,6 @@ def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> l
result = []

for time_stamp, ev, path, duration in include_events:

if current_includes_stack:
last_path = current_includes_stack[-1]
prev = path_to_time.get(last_path, 0)
Expand Down Expand Up @@ -222,22 +222,34 @@ def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int,
if event["name"] == "OptModule":
cpp_file = event["args"]["detail"]

include_events.sort(key=lambda event: (event[0], -event[1]))

path_to_time = {}
current_includes_stack = [(cpp_file, 0)]
last_time_stamp = 0
time_breakdown = {} # header/cpp -> (header -> (cnt, total time))

if cpp_file is None:
print("Can't determine cpp file for {}".format(trace_path))
return path_to_time, time_breakdown

include_events.sort(key=lambda event: (event[0], -event[1]))

cpp_file = sanitize_path(cpp_file, base_src_dir)
current_includes_stack = [(cpp_file, 0)]
for time_stamp, ev, path in include_events:
if current_includes_stack:
last_path, _ = current_includes_stack[-1]
prev = path_to_time.get(last_path, 0)
path_to_time[last_path] = prev + (time_stamp - last_time_stamp) / 1000 / 1000

# add compile breakdown for itself
if last_path not in time_breakdown:
time_breakdown[last_path] = {}

if last_path not in time_breakdown[last_path]:
time_breakdown[last_path][last_path] = [0, 0]

time_breakdown[last_path][last_path][0] = 1 # NB: just 1
time_breakdown[last_path][last_path][1] += (time_stamp - last_time_stamp) / 1000 / 1000

if ev == 1:
current_includes_stack.append((path, time_stamp))
else:
Expand All @@ -247,13 +259,13 @@ def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int,
parent_path = current_includes_stack[-1][0]
if parent_path not in time_breakdown:
time_breakdown[parent_path] = {}

if current_path not in time_breakdown[parent_path]:
time_breakdown[parent_path][current_path] = [0, 0]

time_breakdown[parent_path][current_path][0] += 1
time_breakdown[parent_path][current_path][1] += (time_stamp - include_ts) / 1000 / 1000

last_time_stamp = time_stamp

return path_to_time, time_breakdown
Expand Down Expand Up @@ -284,15 +296,14 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir:

total_time_breakdown[path][subpath][0] += time_breakdown[path][subpath][0]
total_time_breakdown[path][subpath][1] += time_breakdown[path][subpath][1]

for path in total_time_breakdown:
print("*** {}".format(path))
for subpath in total_time_breakdown[path]:
count, total_time_ms = total_time_breakdown[path][subpath]
print(" {} -> total {:.2f}s (included {} times)".format(subpath, total_time_ms, count))
print("")


result = []

for path, (duration, cnt) in path_to_stat.items():
Expand All @@ -310,27 +321,30 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir:
chunks = list(zip(path_chunks, (path_chunks_count - 1) * ["dir"] + ["h"]))
add_to_tree(chunks, int(duration * 1000), tree)
print("{} -> {:.2f}s (aggregated {} times)".format(path, duration, cnt))
headers_compile_duration.append({
"path": path,
"inclusion_count": cnt,
"mean_compilation_time_s": duration / cnt,
})
headers_compile_duration.append(
{
"path": path,
"inclusion_count": cnt,
"mean_compilation_time_s": duration / cnt,
}
)

time_breakdown = {}

for path in total_time_breakdown:
one_file_breakdown = []
for subpath in total_time_breakdown[path]:
inclusion_count, total_s = total_time_breakdown[path][subpath]
one_file_breakdown.append({
"path": subpath,
"inclusion_count": inclusion_count,
"total_time_s": total_s,
})
one_file_breakdown.append(
{
"path": subpath,
"inclusion_count": inclusion_count,
"total_time_s": total_s,
}
)
one_file_breakdown.sort(key=lambda val: -val["total_time_s"])
time_breakdown[path] = one_file_breakdown


human_readable_output = {
"headers_compile_duration": headers_compile_duration,
"time_breakdown": time_breakdown,
Expand Down
60 changes: 58 additions & 2 deletions ydb/ci/build_bloat/ydb_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,23 @@
"id",
"git_commit_message",
"path",
"sub_path",
]

DATETIME_COLUMNS = [
"git_commit_time",
]

UINT64_COLUMNS = []
UINT64_COLUMNS = [
"inclusion_count",
]

DOUBLE_COLUMNS = ["total_compilation_time_s", "compilation_time_s"]
DOUBLE_COLUMNS = [
"total_compilation_time_s",
"compilation_time_s",
"mean_compilation_time_s",
"total_time_s",
]

ALL_COLUMNS = UTF8_COLUMNS + DATETIME_COLUMNS + UINT64_COLUMNS

Expand Down Expand Up @@ -125,8 +133,12 @@ def main():
with open(os.path.join(args.html_dir_cpp, "output.json")) as f:
cpp_stats = json.load(f)

with open(os.path.join(args.html_dir_headers, "output.json")) as f:
header_stats = json.load(f)

rows = []

# upload into cpp_compile_time
for entry in cpp_stats["cpp_compilation_times"]:
path = entry["path"]
time_s = entry["time_s"]
Expand All @@ -142,6 +154,7 @@ def main():
DATABASE_PATH + "/code-agility/cpp_compile_time", rows, generate_column_types(row)
)

# upload into total_compile_time
row = copy.copy(common_parameters)
row["id"] = str(uuid.uuid4())
row["total_compilation_time_s"] = cpp_stats["total_compilation_time"]
Expand All @@ -150,6 +163,49 @@ def main():
DATABASE_PATH + "/code-agility/total_compile_time", [row], generate_column_types(row)
)

# upload into headers_impact
rows = []
for entry in header_stats["headers_compile_duration"]:
path = entry["path"]
inclusion_count = entry["inclusion_count"]
mean_compilation_time_s = entry["mean_compilation_time_s"]
row = copy.copy(common_parameters)
row["id"] = str(uuid.uuid4())
row["path"] = sanitize_str(path)
row["mean_compilation_time_s"] = mean_compilation_time_s
row["inclusion_count"] = inclusion_count
rows.append(copy.copy(row))

if rows:
row = rows[0]
driver.table_client.bulk_upsert(
DATABASE_PATH + "/code-agility/headers_impact", rows, generate_column_types(row)
)

# upload into compile_breakdown
rows = []
for path in header_stats["time_breakdown"]:
entry = header_stats["time_breakdown"][path]
for sub_entry in entry:
sub_path = sub_entry["path"]
inclusion_count = sub_entry["inclusion_count"]
total_time_s = sub_entry["total_time_s"]

row = copy.copy(common_parameters)
row["id"] = str(uuid.uuid4())
row["path"] = path
row["sub_path"] = sub_path
row["inclusion_count"] = inclusion_count
row["total_time_s"] = total_time_s

rows.append(copy.copy(row))

if rows:
row = rows[0]
driver.table_client.bulk_upsert(
DATABASE_PATH + "/code-agility/compile_breakdown", rows, generate_column_types(row)
)


if __name__ == "__main__":
exit(main())

0 comments on commit d0bbe8b

Please sign in to comment.