Skip to content

Commit

Permalink
Merge branch '__rultor'
Browse files Browse the repository at this point in the history
  • Loading branch information
rultor committed Dec 30, 2024
2 parents 3d34512 + f86da23 commit 5bd7d59
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 7 deletions.
34 changes: 32 additions & 2 deletions sr-data/src/sr_data/steps/workflows.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Collect information about GitHub workflows in the repo.
"""
import numpy as np
# The MIT License (MIT)
#
# Copyright (c) 2024 Aliaksei Bialiauski
Expand Down Expand Up @@ -30,7 +31,7 @@

def main(repos, out):
frame = pd.read_csv(repos)
frame["workflows"] = frame["workflows"].fillna("")
frame["workflows"] = frame["workflows"].fillna(0)
for idx, row in frame.iterrows():
repo = row["repo"]
branch = row["branch"]
Expand Down Expand Up @@ -65,13 +66,42 @@ def main(repos, out):
if info["w_release"]:
releases = True
frame.at[idx, "workflows"] = len(ymls)
frame["workflows"] = frame["workflows"]
frame.at[idx, "w_jobs"] = tjobs
frame.at[idx, "w_oss"] = len(set(oss))
frame.at[idx, "w_steps"] = steps
frame.at[idx, "has_release_workflow"] = releases
frame.at[idx, "has_release_workflow"] = int(releases)
frame.at[idx, "w_simplicity"] = w_score(frame.loc[idx])
frame.to_csv(out, index=False)
logger.info(f"Saved repositories to {out}")

wscope = ["workflows", "w_jobs", "w_oss", "w_steps", "has_release_workflow"]
weights = {
"workflows": 0.3,
"w_jobs": 0.25,
"w_steps": 0.25,
"w_oss": 0.1,
"has_release_workflow": 0.1,
}

def w_score(row) -> int:
"""
Workflow simplicity score.
:return: Calculated metric for workflow simplicity score.
@todo #244:35min Enhance workflow simplicity score with min and max adjustment.
Currently, we just subtract collected value from 1. We should adjust it with
min and max values from the dataset. So formula should look like:
1 - (row - min) / (max - min).
"""
normalized = {
"workflows": 1 - row["workflows"],
"w_jobs": 1 - row["w_jobs"],
"w_steps": 1 - row["w_steps"],
"w_oss": 1 - row["w_oss"],
"has_release_workflow": 1 - row["has_release_workflow"],
}
return sum(normalized[key] * weights[key] for key in weights)


def fetch(path) -> str:
return requests.get(f"https://raw.githubusercontent.com/{path}").text
Expand Down
2 changes: 2 additions & 0 deletions sr-data/src/tests/resources/to-wscore.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
repo,workflows,w_jobs,w_steps,w_oss,has_release_workflow
foo/bar,1,2,3,3,0
24 changes: 19 additions & 5 deletions sr-data/src/tests/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@
import pandas as pd
import pytest
import yaml
from sr_data.steps.workflows import workflow_info, main, fetch, \
used_for_releases
from sr_data.steps.workflows import workflow_info, main, fetch, used_for_releases, w_score


class TestWorkflows(unittest.TestCase):
Expand Down Expand Up @@ -90,7 +89,7 @@ def test_outputs_workflow_info_correctly(self):
f"Steps count in workflow: '{info}' does not match with expected"
)

@pytest.mark.fast
@pytest.mark.nightly
def test_collects_unique_oss_across_all_files(self):
with TemporaryDirectory() as temp:
path = os.path.join(temp, "workflows.csv")
Expand All @@ -109,7 +108,7 @@ def test_collects_unique_oss_across_all_files(self):
f"OSS count: {oss} does not match with expected: {expected}"
)

@pytest.mark.fast
@pytest.mark.nightly
def test_collects_workflows_for_all(self):
with TemporaryDirectory() as temp:
path = os.path.join(temp, "workflows.csv")
Expand All @@ -129,7 +128,7 @@ def test_collects_workflows_for_all(self):
f"Frame {frame.columns} doesn't have expected columns"
)

@pytest.mark.fast
@pytest.mark.nightly
def test_counts_workflows_correctly(self):
with TemporaryDirectory() as temp:
path = os.path.join(temp, "workflows.csv")
Expand Down Expand Up @@ -403,3 +402,18 @@ def test_parses_oss_as_list_in_matrix(self):
0,
f"Steps count in workflow: '{info}' does not match with expected"
)


@pytest.mark.fast
def test_calculates_simplicity_score(self):
scores = pd.read_csv(
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"resources/to-wscore.csv"
)
)
self.assertEqual(
w_score(scores.iloc[0]),
-0.85,
"Calculated score does not match with expected"
)

1 comment on commit 5bd7d59

@0pdd
Copy link
Collaborator

@0pdd 0pdd commented on 5bd7d59 Dec 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Puzzle 244-70f0b4e2 discovered in sr-data/src/sr_data/steps/workflows.py) and submitted as #291. Please, remember that the puzzle was not necessarily added in this particular commit. Maybe it was added earlier, but we discovered it only now.

Please sign in to comment.