From 6d666f8b8b0603e1cec464f2987bc1470392f74c Mon Sep 17 00:00:00 2001 From: Oumaima Hamza Date: Mon, 1 Apr 2024 22:17:04 -0400 Subject: [PATCH 1/2] Set up Github Actions to update gene list --- .../workflows/updating_oncokb_database.yml | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 .github/workflows/updating_oncokb_database.yml diff --git a/.github/workflows/updating_oncokb_database.yml b/.github/workflows/updating_oncokb_database.yml new file mode 100644 index 000000000..059898b54 --- /dev/null +++ b/.github/workflows/updating_oncokb_database.yml @@ -0,0 +1,52 @@ +name: update oncokb curated gene List + +on: + schedule: + # Runs at 00:00 on the 1st day of every second month + - cron: '0 0 1 */2 *' + workflow_dispatch: {} + #run manually + +jobs: + update-gene-list: + runs-on: ubuntu-latest + + steps: + - name: checkout repo + uses: actions/checkout@v4 + with: + ref: release-1.6.0 + + - name: set up python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: installing dependencies + run: + pip install pandas + + - name: fetch and update gene list + env: + TOKEN: ${{ secrets.ONCOKB_TOKEN }} + run: | + # download the database + curl -H "Authorization: Bearer $TOKEN" https://www.oncokb.org/api/v1/utils/allCuratedGenes > allCuratedGenes.json + + # convert json to tsv + python -c "import pandas as pd; import datetime; df = pd.read_json('allCuratedGenes.json'); date_str = datetime.datetime.now().strftime('%Y%m%d'); df.to_csv(f'djerba/src/lib/djerba/data/{date_str}_allcuratedgenelist.tsv', sep='\t', index=False)" + + # delete json file + rm allCuratedGenes.json + + # replace filename in the constants.py script + sed -i "s|ALL_CURATED_GENES = .*|ALL_CURATED_GENES = '{date_str}_allcuratedgenelist.tsv'|g" djerba/src/lib/djerba/util/oncoKB/constants.py + + - name: create pull request + uses: peter-evans/create-pull-request@v6 + with: + commit-message: "Update curated gene list" + branch: OncoKb_database_update + base: release-1.6.0 + title: "Update curated gene list" + body: "This is an automated PR made by Github Actions to update OncoKb list of curated genes." From c3934e5471592640aebe300a23a3735685cb1ca3 Mon Sep 17 00:00:00 2001 From: Oumaima Hamza Date: Tue, 2 Apr 2024 23:24:00 -0400 Subject: [PATCH 2/2] using awk to replace filename --- .github/workflows/updating_oncokb_database.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/updating_oncokb_database.yml b/.github/workflows/updating_oncokb_database.yml index 059898b54..dbc289899 100644 --- a/.github/workflows/updating_oncokb_database.yml +++ b/.github/workflows/updating_oncokb_database.yml @@ -34,13 +34,13 @@ jobs: curl -H "Authorization: Bearer $TOKEN" https://www.oncokb.org/api/v1/utils/allCuratedGenes > allCuratedGenes.json # convert json to tsv - python -c "import pandas as pd; import datetime; df = pd.read_json('allCuratedGenes.json'); date_str = datetime.datetime.now().strftime('%Y%m%d'); df.to_csv(f'djerba/src/lib/djerba/data/{date_str}_allcuratedgenelist.tsv', sep='\t', index=False)" + python -c "import pandas as pd; import datetime; df = pd.read_json('allCuratedGenes.json'); date_str = datetime.datetime.now().strftime('%Y%m%d'); df.to_csv(f'src/lib/djerba/data/{date_str}_allcuratedgenelist.tsv', sep='\t', index=False)" # delete json file rm allCuratedGenes.json # replace filename in the constants.py script - sed -i "s|ALL_CURATED_GENES = .*|ALL_CURATED_GENES = '{date_str}_allcuratedgenelist.tsv'|g" djerba/src/lib/djerba/util/oncoKB/constants.py + awk -v new_val="ALL_CURATED_GENES = \"${date_str}_allcuratedgenelist.tsv\"" '{gsub(/ALL_CURATED_GENES = \047.*\047/, new_val)}1' src/lib/djerba/util/oncoKB/constants.py > temp && mv temp src/lib/djerba/util/oncoKB/constants.py - name: create pull request uses: peter-evans/create-pull-request@v6