Skip to content

Commit

Permalink
Merge bd9ff74 into 0bce610
Browse files Browse the repository at this point in the history
  • Loading branch information
maximyurchuk authored May 29, 2024
2 parents 0bce610 + bd9ff74 commit 4a86ce1
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 13 deletions.
3 changes: 3 additions & 0 deletions .github/actions/build_analytics/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@ runs:
# FIXME: target name may be not the same as dir name
export TARGET_NAME=`basename ${{ inputs.build_target }}`
export TARGET_DIR=${{ inputs.build_target }}
export build_preset="${{ inputs.build_preset }}"
export build_target="${{ inputs.build_target }}"
./ya tool bloat --linker-map $TARGET_DIR/$TARGET_NAME.map.lld --input $TARGET_DIR/$TARGET_NAME --save-html ya_bloat_html
./ydb/ci/build_bloat/main.py --build-dir . --html-dir-cpp html_cpp_impact --html-dir-headers html_headers_impact
./ydb/ci/build_bloat/ydb_upload.py --html-dir-cpp html_cpp_impact --html-dir-headers html_headers_impact
- name: Upload results
shell: bash
Expand Down
43 changes: 30 additions & 13 deletions ydb/ci/build_bloat/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@


def sanitize_path(path: str, base_dir: str) -> str:
prefixes_to_remove = [
base_dir,
os.path.abspath(base_dir),
]

for prefix in prefixes_to_remove:
path = path.removeprefix(prefix)

ya_build_path_chunk = ".ya/build/build_root"
if ya_build_path_chunk in path:
# remove path to before .ya
Expand All @@ -27,8 +19,15 @@ def sanitize_path(path: str, base_dir: str) -> str:
splitted = path.split(os.sep)
del splitted[3:5]
path = os.sep.join(splitted)
else:
# dirty hack: remove all before ydb (repo name) including ydb
ydb_repo_chunk = "ydb/"
if ydb_repo_chunk in path:
# remove path to before ydb with ydb
path = path[path.find(ydb_repo_chunk) + len(ydb_repo_chunk) :]

return "root" + "/" + path

return "ydb/" + path


def get_compile_duration_and_cpp_path(time_trace_path: str) -> tuple[float, str, str]:
Expand Down Expand Up @@ -144,7 +143,7 @@ def gather_time_traces(build_output_dir: str) -> list[str]:
return time_trace_paths


def generate_cpp_bloat(build_output_dir: str) -> dict:
def generate_cpp_bloat(build_output_dir: str, result_dir: str) -> dict:
time_trace_paths = gather_time_traces(build_output_dir)

result = []
Expand All @@ -157,6 +156,9 @@ def generate_cpp_bloat(build_output_dir: str) -> dict:
result.sort()

tree = {"name": "/"}

cpp_compilation_times = []
total_compilation_time = 0.0

for duration, path, time_trace_path in result:
splitted = path.split(os.sep)
Expand All @@ -167,7 +169,22 @@ def generate_cpp_bloat(build_output_dir: str) -> dict:
additional_chunks = list(zip(inc_path, "h" * len(inc_path)))
add_to_tree(chunks + additional_chunks, inc_duration / 1000, tree)
print("{} -> {:.2f}s".format(path, duration))

cpp_compilation_times.append({
"path": path,
"time_s": duration,
})
total_compilation_time += duration

os.makedirs(result_dir, exist_ok=True)

human_readable_output = {
"total_compilation_time": total_compilation_time,
"cpp_compilation_times": cpp_compilation_times,
}

with open(os.path.join(result_dir, "output.json"), "w") as f:
json.dump(human_readable_output, f, indent=4)

propogate_area(tree)
enrich_names_with_sec(tree)

Expand Down Expand Up @@ -222,7 +239,7 @@ def parse_includes(path: str) -> list[tuple[int, str]]:
return path_to_time


def generate_header_bloat(build_output_dir: str) -> dict:
def generate_header_bloat(build_output_dir: str, result_dir: str) -> dict:
time_trace_paths = gather_time_traces(build_output_dir)

path_to_stat = {} # header path -> (total_duration, count)
Expand Down Expand Up @@ -304,7 +321,7 @@ def main():

for description, fn, output_path in actions:
print("Performing '{}'".format(description))
tree = fn(args.build_dir)
tree = fn(args.build_dir, output_path)

shutil.copytree(html_dir, output_path, dirs_exist_ok=True)
with open(os.path.join(output_path, "bloat.json"), "w") as f:
Expand Down
161 changes: 161 additions & 0 deletions ydb/ci/build_bloat/ydb_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python3

import argparse
import copy
import datetime
import json
import os
import ydb
import uuid
import subprocess

DATABASE_PATH = "/ru-central1/b1ggceeul2pkher8vhb6/etn6d1qbals0c29ho4lf"

FROM_ENV_COLUMNS = [
"GITHUB_HEAD_REF",
"GITHUB_WORKFLOW",
"GITHUB_WORKFLOW_REF",
"GITHUB_SHA",
"GITHUB_REPOSITORY",
"GITHUB_EVENT_NAME",
"GITHUB_REF_TYPE",
"GITHUB_REF_NAME",
"GITHUB_REF",
"build_preset",
"build_target",
]

UTF8_COLUMNS = [val.lower() for val in FROM_ENV_COLUMNS] + [
"id",
"git_commit_message",
"path",
]

DATETIME_COLUMNS = [
"git_commit_time",
]

UINT64_COLUMNS = []

DOUBLE_COLUMNS = ["total_compilation_time_s", "compilation_time_s"]

ALL_COLUMNS = UTF8_COLUMNS + DATETIME_COLUMNS + UINT64_COLUMNS


def sanitize_str(s):
return s or "N\\A"


def generate_column_types(row):
column_types = ydb.BulkUpsertColumns()
for column_name in row:
if column_name in UTF8_COLUMNS:
column_types = column_types.add_column(column_name, ydb.PrimitiveType.Utf8)
elif column_name in UINT64_COLUMNS:
column_types = column_types.add_column(column_name, ydb.PrimitiveType.Uint64)
elif column_name in DOUBLE_COLUMNS:
column_types = column_types.add_column(column_name, ydb.PrimitiveType.Double)
elif column_name in DATETIME_COLUMNS:
column_types = column_types.add_column(column_name, ydb.PrimitiveType.Datetime)
else:
assert False
return column_types


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"-c",
"--html-dir-cpp",
required=True,
help="Path to treemap view of compilation times",
)
parser.add_argument(
"-i",
"--html-dir-headers",
required=False,
default="html_headers_impact",
help="Path to treemap view of headers impact on cpp compilation",
)
return parser.parse_args()


def main():
args = parse_args()

if "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS" not in os.environ:
print("Env variable CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS is missing, skipping")
return 1

# Do not set up 'real' variable from gh workflows because it interfere with ydb tests
# So, set up it locally
os.environ["YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"] = os.environ["CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"]

with ydb.Driver(
endpoint="grpcs://ydb.serverless.yandexcloud.net:2135",
database=DATABASE_PATH,
credentials=ydb.credentials_from_env_variables(),
) as driver:
driver.wait(timeout=10, fail_fast=True)

column_types = ydb.BulkUpsertColumns()
for type_ in UTF8_COLUMNS:
column_types = column_types.add_column(type_, ydb.PrimitiveType.Utf8)
for type_ in UINT64_COLUMNS:
column_types = column_types.add_column(type_, ydb.PrimitiveType.Uint64)
for type_ in DATETIME_COLUMNS:
column_types = column_types.add_column(type_, ydb.PrimitiveType.Datetime)
for type_ in DOUBLE_COLUMNS:
column_types = column_types.add_column(type_, ydb.PrimitiveType.Double)

build_preset = os.environ.get("build_preset", None)
github_sha = os.environ.get("GITHUB_SHA", None)

if github_sha is not None:
git_commit_time_bytes = subprocess.check_output(["git", "show", "--no-patch", "--format=%cI", github_sha])
git_commit_message_bytes = subprocess.check_output(["git", "log", "--format=%s", "-n", "1", github_sha])
git_commit_time = datetime.datetime.fromisoformat(git_commit_time_bytes.decode("utf-8").strip())
git_commit_message = git_commit_message_bytes.decode("utf-8").strip()
git_commit_time_unix = int(git_commit_time.timestamp())
else:
git_commit_time = None
git_commit_message = None
git_commit_time_unix = 0

common_parameters = {
"build_preset": sanitize_str(build_preset),
"git_commit_time": git_commit_time_unix,
"git_commit_message": sanitize_str(git_commit_message),
}

for column in FROM_ENV_COLUMNS:
value = os.environ.get(column, None)
common_parameters[column.lower()] = sanitize_str(value)

with open(os.path.join(args.html_dir_cpp, "output.json")) as f:
cpp_stats = json.load(f)

rows = []

for entry in cpp_stats["cpp_compilation_times"]:
path = entry["path"]
time_s = entry["time_s"]
row = copy.copy(common_parameters)
row["path"] = sanitize_str(path)
row["compilation_time_s"] = time_s
row["id"] = str(uuid.uuid4())
rows.append(copy.copy(row))

if rows:
row = rows[0]
driver.table_client.bulk_upsert(DATABASE_PATH + "/code-agility/cpp_compile_time", rows, generate_column_types(row))

row = copy.copy(common_parameters)
row["id"] = str(uuid.uuid4())
row["total_compilation_time_s"] = cpp_stats["total_compilation_time"]

driver.table_client.bulk_upsert(DATABASE_PATH + "/code-agility/total_compile_time", [row], generate_column_types(row))


if __name__ == "__main__":
exit(main())

0 comments on commit 4a86ce1

Please sign in to comment.