Skip to content

Commit

Permalink
feat(#245): junit
Browse files Browse the repository at this point in the history
  • Loading branch information
h1alexbel committed Dec 5, 2024
1 parent 8dbbde4 commit fe55f80
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
- name: Create
run: |
docker run --rm -v "$(pwd)/output:/collection" \
-e QUERY="stars:>10 language:java,rust size:>=20 mirror:false template:false NOT android" \
-e QUERY="stars:>10 language:java size:>=20 mirror:false template:false NOT android" \
-e START="${{ inputs.start }}" -e END="${{ inputs.end }}" \
-e COLLECT_TOKEN="${{ secrets.COLLECT_TOKEN_1 }}" \
-e GH_TOKEN="${{ secrets.GITHUB_TOKEN }}" \
Expand Down
15 changes: 8 additions & 7 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,14 @@ dataset folder out:

# Cluster repositories.
cluster dir out:
cd sr-train && poetry poe cluster --dataset "{{dir}}/d1-scores.csv" --dir {{out}}
cd sr-train && poetry poe cluster --dataset "{{dir}}/d2-sbert.csv" --dir {{out}}
cd sr-train && poetry poe cluster --dataset "{{dir}}/d3-e5.csv" --dir {{out}}
cd sr-train && poetry poe cluster --dataset "{{dir}}/d4-embedv3.csv" --dir {{out}}
cd sr-train && poetry poe cluster --dataset "{{dir}}/d5-scores+sbert.csv" --dir {{out}}
cd sr-train && poetry poe cluster --dataset "{{dir}}/d6-scores+e5.csv" --dir {{out}}
cd sr-train && poetry poe cluster --dataset "{{dir}}/d7-scores+embedv3.csv" --dir {{out}}
cd sr-train && poetry poe cluster --dataset "{{dir}}/d0-numerical.csv" --dir {{out}}
# cd sr-train && poetry poe cluster --dataset "{{dir}}/d1-scores.csv" --dir {{out}}
# cd sr-train && poetry poe cluster --dataset "{{dir}}/d2-sbert.csv" --dir {{out}}
# cd sr-train && poetry poe cluster --dataset "{{dir}}/d3-e5.csv" --dir {{out}}
# cd sr-train && poetry poe cluster --dataset "{{dir}}/d4-embedv3.csv" --dir {{out}}
# cd sr-train && poetry poe cluster --dataset "{{dir}}/d5-scores+sbert.csv" --dir {{out}}
# cd sr-train && poetry poe cluster --dataset "{{dir}}/d6-scores+e5.csv" --dir {{out}}
# cd sr-train && poetry poe cluster --dataset "{{dir}}/d7-scores+embedv3.csv" --dir {{out}}

# Statistics about generated clusters.
clusterstat out dir="experiment":
Expand Down
5 changes: 5 additions & 0 deletions sr-data/resources/pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,10 @@
"workflows": {
"repos": "@in",
"out": "../after-workflows.csv"
},
"junit": {
"repos": "@in",
"token": "$GH_TOKEN",
"out": "../after-junit.csv"
}
}
2 changes: 1 addition & 1 deletion sr-data/src/sr_data/steps/junit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def main(repos, out, token):
frame = pd.read_csv(repos)
logger.info(f"Counting JUnit tests in {len(frame)} repositories")
for idx, row in frame.iterrows():
frame.at[idx, "junit_tests"] = count_of_tests(
frame.at[idx, "tests"] = count_of_tests(
row["repo"],
row["branch"],
token
Expand Down
3 changes: 2 additions & 1 deletion sr-data/src/sr_data/steps/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def main(repos, out):
"open_issues",
"branches",
"workflows",
"has_release_workflow"
"has_release_workflow",
"tests"
]
]
frame["has_release_workflow"] = frame["has_release_workflow"].astype(int)
Expand Down
6 changes: 3 additions & 3 deletions sr-data/src/tests/resources/to-numerical.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
repo,branch,readme,releases,pulls,open_issues,branches,license,workflows,w_jobs,w_oss,w_steps,has_release_workflow
foo/bar,master,"",0,0,1,1,"MIT",1,2,3,4,True
bar/foo,master,"",0,0,1,1,"MIT",1,2,3,4,False
repo,branch,readme,releases,pulls,open_issues,branches,license,workflows,w_jobs,w_oss,w_steps,has_release_workflow,tests
foo/bar,master,"",0,0,1,1,"MIT",1,2,3,4,True,0
bar/foo,master,"",0,0,1,1,"MIT",1,2,3,4,False,0
4 changes: 2 additions & 2 deletions sr-data/src/tests/test_junit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,5 @@ def test_counts_junit_tests_in_csv(self):
os.environ["GH_TESTING_TOKEN"]
)
frame = pd.read_csv(path)
self.assertEqual(frame.iloc[0]["junit_tests"], 187)
self.assertEqual(frame.iloc[1]["junit_tests"], 0)
self.assertEqual(frame.iloc[0]["tests"], 187)
self.assertEqual(frame.iloc[1]["tests"], 0)
3 changes: 2 additions & 1 deletion sr-data/src/tests/test_numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def test_creates_csv_with_numerical(self):
"open_issues",
"branches",
"workflows",
"has_release_workflow"
"has_release_workflow",
"tests"
]
),
f"Frame {frame.columns} doesn't have expected columns"
Expand Down

0 comments on commit fe55f80

Please sign in to comment.