Skip to content

Commit

Permalink
Update notification service (#17921)
Browse files Browse the repository at this point in the history
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
  • Loading branch information
2 people authored and muellerzr committed Jul 25, 2022
1 parent 28d474c commit 726098f
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 34 deletions.
15 changes: 14 additions & 1 deletion .github/workflows/self-past.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ jobs:
steps:
- uses: actions/checkout@v2
- uses: actions/download-artifact@v2

# Create a directory to store test failure tables in the next step
- name: Create directory
run: mkdir test_failure_tables

- name: Send message to Slack
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
Expand All @@ -176,4 +181,12 @@ jobs:
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
run: |
pip install slack_sdk
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }}
path: test_failure_tables
115 changes: 82 additions & 33 deletions utils/notification_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,15 +233,11 @@ def category_failures(self) -> Dict:
individual_reports.append(key)

header = "Single | Multi | Category\n"
category_failures_report = header + "\n".join(sorted(individual_reports))
category_failures_report = prepare_reports(
title="The following modeling categories had failures", header=header, reports=individual_reports
)

return {
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"The following modeling categories had failures:\n\n```\n{category_failures_report}\n```",
},
}
return {"type": "section", "text": {"type": "mrkdwn", "text": category_failures_report}}

@property
def model_failures(self) -> Dict:
Expand Down Expand Up @@ -302,21 +298,44 @@ def per_model_sum(model_category_dict):

model_header = "Single PT | Multi PT | Single TF | Multi TF | Other | Category\n"
sorted_model_reports = sorted(model_reports, key=lambda s: s.split("] ")[-1])
model_failures_report = model_header + "\n".join(sorted_model_reports)
model_failures_report = prepare_reports(
title="These following model modules had failures", header=model_header, reports=sorted_model_reports
)

module_header = "Single | Multi | Category\n"
sorted_module_reports = sorted(other_module_reports, key=lambda s: s.split("] ")[-1])
module_failures_report = module_header + "\n".join(sorted_module_reports)

report = ""
module_failures_report = prepare_reports(
title="The following non-model modules had failures", header=module_header, reports=sorted_module_reports
)

if len(model_reports):
report += f"These following model modules had failures:\n```\n{model_failures_report}\n```\n\n"
model_failure_sections = [
{"type": "section", "text": {"type": "mrkdwn", "text": model_failures_report}},
{"type": "section", "text": {"type": "mrkdwn", "text": module_failures_report}},
]

if len(other_module_reports):
report += f"The following non-model modules had failures:\n```\n{module_failures_report}\n```\n\n"
# Save complete tables (for past CI) - to be uploaded as artifacts
if ci_event.startswith("Past CI"):
model_failures_report = prepare_reports(
title="These following model modules had failures",
header=model_header,
reports=sorted_model_reports,
to_truncate=False,
)
file_path = os.path.join(os.getcwd(), "test_failure_tables/model_failures_report.txt")
with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(model_failures_report)

module_failures_report = prepare_reports(
title="The following non-model modules had failures",
header=module_header,
reports=sorted_module_reports,
to_truncate=False,
)
file_path = os.path.join(os.getcwd(), "test_failure_tables/module_failures_report.txt")
with open(file_path, "w", encoding="UTF-8") as fp:
fp.write(module_failures_report)

return {"type": "section", "text": {"type": "mrkdwn", "text": report}}
return model_failure_sections

@property
def additional_failures(self) -> Dict:
Expand All @@ -337,15 +356,11 @@ def additional_failures(self) -> Dict:
individual_reports.append(report)

header = "Single | Multi | Category\n"
failures_report = header + "\n".join(sorted(individual_reports))
failures_report = prepare_reports(
title="The following non-modeling tests had failures", header=header, reports=individual_reports
)

return {
"type": "section",
"text": {
"type": "mrkdwn",
"text": f"The following non-modeling tests had failures:\n```\n{failures_report}\n```",
},
}
return {"type": "section", "text": {"type": "mrkdwn", "text": failures_report}}

@property
def payload(self) -> str:
Expand All @@ -358,7 +373,10 @@ def payload(self) -> str:
blocks.append(self.failures)

if self.n_model_failures > 0:
blocks.extend([self.category_failures, self.model_failures])
blocks.append(self.category_failures)
for block in self.model_failures:
if block["text"]["text"]:
blocks.append(block)

if self.n_additional_failures > 0:
blocks.append(self.additional_failures)
Expand Down Expand Up @@ -582,6 +600,29 @@ def add_path(self, path: str, gpu: str = None):
return _available_artifacts


def prepare_reports(title, header, reports, to_truncate=True):
report = ""

MAX_ERROR_TEXT = 3000 - len("[Truncated]")
if not to_truncate:
MAX_ERROR_TEXT = float("inf")

if len(reports) > 0:
# `text` must be less than 3001 characters in Slack SDK
# keep some room for adding "[Truncated]" when necessary

for idx in range(len(reports)):
_report = header + "\n".join(reports[: idx + 1])
new_report = f"{title}:\n```\n{_report}\n```\n"
if len(new_report) > MAX_ERROR_TEXT:
# `report` here has length <= 3000
report = report + "[Truncated]"
break
report = new_report

return report


if __name__ == "__main__":

org = "huggingface"
Expand Down Expand Up @@ -686,16 +727,24 @@ def add_path(self, path: str, gpu: str = None):

unclassified_model_failures = []

# This prefix is used to get job links below. For past CI, we use `workflow_call`, which changes the job names from
# `Model tests (...)` to `PyTorch 1.5 / Model tests (...)` for example.
job_name_prefix = ""
if ci_event.startswith("Past CI - "):
framework, version = ci_event.replace("Past CI - ", "").split("-")
framework = "PyTorch" if framework == "pytorch" else "TensorFlow"
job_name_prefix = f"{framework} {version}"

for model in model_results.keys():
for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths:
artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"])
if "stats" in artifact:
# Link to the GitHub Action job
model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(
# The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert`
f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)"
)

# The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert`
job_name = f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)"
if job_name_prefix:
job_name = f"{job_name_prefix} / {job_name}"
model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(job_name)
failed, success, time_spent = handle_test_results(artifact["stats"])
model_results[model]["success"] += success
model_results[model]["time_spent"] += time_spent[1:-1] + ", "
Expand Down Expand Up @@ -809,7 +858,7 @@ def add_path(self, path: str, gpu: str = None):

message = Message(title, ci_title, model_results, additional_results)

# send report only if there is any failure
if message.n_failures:
# send report only if there is any failure (for push CI)
if message.n_failures or ci_event != "push":
message.post()
message.post_reply()

0 comments on commit 726098f

Please sign in to comment.