Skip to content

datasets

datasets #23

Workflow file for this run

# The MIT License (MIT)
#
# Copyright (c) 2024 Aliaksei Bialiauski
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
---
name: datasets
'on':
workflow_dispatch:
inputs:
steps:
description: "Comma-separated names of steps to be executed"
required: true
start:
description: "Collect repositories created from, in YYYY-MM-D format"
required: true
end:
description: "Collect repositories created up to, in YYYY-MM-DD format"
required: false
default: "2024-05-01"
out:
description: "Output directory"
required: true
clean:
description: "Clean branch before uploading files"
required: true
type: choice
default: false
options:
- false
- true
numbase:
description: "CSV file with numerical features, considered as base for other datasets"
required: true
permissions: write-all
jobs:
create:
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: abatilo/actions-poetry@v3
with:
poetry-version: "1.8.3"
- uses: extractions/setup-just@v2
with:
just-version: "1.30.1"
- name: Create
run: |
docker run --rm -v "$(pwd)/output:/collection" \
-e QUERY="stars:>10 language:java,rust size:>=20 mirror:false template:false NOT android" \
-e START="${{ inputs.start }}" -e END="${{ inputs.end }}" \
-e COLLECT_TOKEN="${{ secrets.COLLECT_TOKEN_1 }}" \
-e GH_TOKEN="${{ secrets.GITHUB_TOKEN }}" \
-e HF_TOKEN="${{ secrets.HF_TOKEN }}" \
-e COHERE_TOKEN="${{ secrets.COHERE_TOKEN }}" \
-e OUT="${{ inputs.out }}" -e STEPS="${{ inputs.steps }}" \
-e NUMBASE="${{ inputs.numbase }}" -e EMBEDDINGS=false \
h1alexbel/sr-detection
- uses: JamesIves/github-pages-deploy-action@v4.7.1
with:
branch: gh-pages
folder: output
clean: ${{ inputs.clean }}
if: github.ref == 'refs/heads/master'