diff --git a/ydb/ci/build_bloat/main.py b/ydb/ci/build_bloat/main.py index f8c10e231816..317bb7c12ba3 100755 --- a/ydb/ci/build_bloat/main.py +++ b/ydb/ci/build_bloat/main.py @@ -9,6 +9,7 @@ HEADER_COMPILE_TIME_TO_SHOW = 0.5 # sec + def sanitize_path(path: str, base_src_dir: str) -> str: home_dir = os.environ["HOME"] ya_build_path_chunk = ".ya/build/build_root" @@ -121,7 +122,6 @@ def build_include_tree(path: str, build_output_dir: str, base_src_dir: str) -> l result = [] for time_stamp, ev, path, duration in include_events: - if current_includes_stack: last_path = current_includes_stack[-1] prev = path_to_time.get(last_path, 0) @@ -222,22 +222,34 @@ def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int, if event["name"] == "OptModule": cpp_file = event["args"]["detail"] - include_events.sort(key=lambda event: (event[0], -event[1])) - path_to_time = {} - current_includes_stack = [(cpp_file, 0)] last_time_stamp = 0 time_breakdown = {} # header/cpp -> (header -> (cnt, total time)) + if cpp_file is None: print("Can't determine cpp file for {}".format(trace_path)) return path_to_time, time_breakdown + include_events.sort(key=lambda event: (event[0], -event[1])) + + cpp_file = sanitize_path(cpp_file, base_src_dir) + current_includes_stack = [(cpp_file, 0)] for time_stamp, ev, path in include_events: if current_includes_stack: last_path, _ = current_includes_stack[-1] prev = path_to_time.get(last_path, 0) path_to_time[last_path] = prev + (time_stamp - last_time_stamp) / 1000 / 1000 + # add compile breakdown for itself + if last_path not in time_breakdown: + time_breakdown[last_path] = {} + + if last_path not in time_breakdown[last_path]: + time_breakdown[last_path][last_path] = [0, 0] + + time_breakdown[last_path][last_path][0] = 1 # NB: just 1 + time_breakdown[last_path][last_path][1] += (time_stamp - last_time_stamp) / 1000 / 1000 + if ev == 1: current_includes_stack.append((path, time_stamp)) else: @@ -247,13 +259,13 @@ def parse_includes(trace_path: str, base_src_dir: str) -> tuple[list[tuple[int, parent_path = current_includes_stack[-1][0] if parent_path not in time_breakdown: time_breakdown[parent_path] = {} - + if current_path not in time_breakdown[parent_path]: time_breakdown[parent_path][current_path] = [0, 0] - + time_breakdown[parent_path][current_path][0] += 1 time_breakdown[parent_path][current_path][1] += (time_stamp - include_ts) / 1000 / 1000 - + last_time_stamp = time_stamp return path_to_time, time_breakdown @@ -284,7 +296,7 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir: total_time_breakdown[path][subpath][0] += time_breakdown[path][subpath][0] total_time_breakdown[path][subpath][1] += time_breakdown[path][subpath][1] - + for path in total_time_breakdown: print("*** {}".format(path)) for subpath in total_time_breakdown[path]: @@ -292,7 +304,6 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir: print(" {} -> total {:.2f}s (included {} times)".format(subpath, total_time_ms, count)) print("") - result = [] for path, (duration, cnt) in path_to_stat.items(): @@ -310,11 +321,13 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir: chunks = list(zip(path_chunks, (path_chunks_count - 1) * ["dir"] + ["h"])) add_to_tree(chunks, int(duration * 1000), tree) print("{} -> {:.2f}s (aggregated {} times)".format(path, duration, cnt)) - headers_compile_duration.append({ - "path": path, - "inclusion_count": cnt, - "mean_compilation_time_s": duration / cnt, - }) + headers_compile_duration.append( + { + "path": path, + "inclusion_count": cnt, + "mean_compilation_time_s": duration / cnt, + } + ) time_breakdown = {} @@ -322,15 +335,16 @@ def generate_header_bloat(build_output_dir: str, result_dir: str, base_src_dir: one_file_breakdown = [] for subpath in total_time_breakdown[path]: inclusion_count, total_s = total_time_breakdown[path][subpath] - one_file_breakdown.append({ - "path": subpath, - "inclusion_count": inclusion_count, - "total_time_s": total_s, - }) + one_file_breakdown.append( + { + "path": subpath, + "inclusion_count": inclusion_count, + "total_time_s": total_s, + } + ) one_file_breakdown.sort(key=lambda val: -val["total_time_s"]) time_breakdown[path] = one_file_breakdown - human_readable_output = { "headers_compile_duration": headers_compile_duration, "time_breakdown": time_breakdown, diff --git a/ydb/ci/build_bloat/ydb_upload.py b/ydb/ci/build_bloat/ydb_upload.py index c4f235c68dde..0ac50cb10a3a 100755 --- a/ydb/ci/build_bloat/ydb_upload.py +++ b/ydb/ci/build_bloat/ydb_upload.py @@ -29,15 +29,23 @@ "id", "git_commit_message", "path", + "sub_path", ] DATETIME_COLUMNS = [ "git_commit_time", ] -UINT64_COLUMNS = [] +UINT64_COLUMNS = [ + "inclusion_count", +] -DOUBLE_COLUMNS = ["total_compilation_time_s", "compilation_time_s"] +DOUBLE_COLUMNS = [ + "total_compilation_time_s", + "compilation_time_s", + "mean_compilation_time_s", + "total_time_s", +] ALL_COLUMNS = UTF8_COLUMNS + DATETIME_COLUMNS + UINT64_COLUMNS @@ -125,8 +133,12 @@ def main(): with open(os.path.join(args.html_dir_cpp, "output.json")) as f: cpp_stats = json.load(f) + with open(os.path.join(args.html_dir_headers, "output.json")) as f: + header_stats = json.load(f) + rows = [] + # upload into cpp_compile_time for entry in cpp_stats["cpp_compilation_times"]: path = entry["path"] time_s = entry["time_s"] @@ -142,6 +154,7 @@ def main(): DATABASE_PATH + "/code-agility/cpp_compile_time", rows, generate_column_types(row) ) + # upload into total_compile_time row = copy.copy(common_parameters) row["id"] = str(uuid.uuid4()) row["total_compilation_time_s"] = cpp_stats["total_compilation_time"] @@ -150,6 +163,49 @@ def main(): DATABASE_PATH + "/code-agility/total_compile_time", [row], generate_column_types(row) ) + # upload into headers_impact + rows = [] + for entry in header_stats["headers_compile_duration"]: + path = entry["path"] + inclusion_count = entry["inclusion_count"] + mean_compilation_time_s = entry["mean_compilation_time_s"] + row = copy.copy(common_parameters) + row["id"] = str(uuid.uuid4()) + row["path"] = sanitize_str(path) + row["mean_compilation_time_s"] = mean_compilation_time_s + row["inclusion_count"] = inclusion_count + rows.append(copy.copy(row)) + + if rows: + row = rows[0] + driver.table_client.bulk_upsert( + DATABASE_PATH + "/code-agility/headers_impact", rows, generate_column_types(row) + ) + + # upload into compile_breakdown + rows = [] + for path in header_stats["time_breakdown"]: + entry = header_stats["time_breakdown"][path] + for sub_entry in entry: + sub_path = sub_entry["path"] + inclusion_count = sub_entry["inclusion_count"] + total_time_s = sub_entry["total_time_s"] + + row = copy.copy(common_parameters) + row["id"] = str(uuid.uuid4()) + row["path"] = path + row["sub_path"] = sub_path + row["inclusion_count"] = inclusion_count + row["total_time_s"] = total_time_s + + rows.append(copy.copy(row)) + + if rows: + row = rows[0] + driver.table_client.bulk_upsert( + DATABASE_PATH + "/code-agility/compile_breakdown", rows, generate_column_types(row) + ) + if __name__ == "__main__": exit(main())