Skip to content
This repository has been archived by the owner on May 6, 2024. It is now read-only.

Run tests scrapers #28915

Run tests scrapers

Run tests scrapers #28915

name: Run tests scrapers
on:
schedule:
- cron: '20 * * * *' # run every hour at xx:20
workflow_dispatch: ~
jobs:
run_scraper:
runs-on: ubuntu-20.04
continue-on-error: false
timeout-minutes: 10
strategy:
fail-fast: false
matrix:
canton:
# - AG
- BE
- BL
- BS
- FL
# - FR # no more data published anymore
# - GE
- GL
# - JU #disable until PDF is fixed
# - NW
- SG
- SH
# - SO
- TG
# - TI # no more data published anymore
# - VD
- VS
- ZG
- ZH
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.7
uses: actions/setup-python@v4
with:
python-version: 3.7
- run: npm ci
- name: Remove broken apt repos
run: |
for apt_file in `grep -lr microsoft /etc/apt/sources.list.d/`; do sudo rm $apt_file; done
- name: Install dependencies
env:
SCRAPER_KEY: ${{ matrix.canton }}
run: |
python -m pip install --upgrade pip setuptools wheel
pip install -r requirements.txt
sudo apt update || true # do not fail if update does not work
sudo apt-get install sqlite3 poppler-utils
if [ "$SCRAPER_KEY" = "GE" ] ; then
sudo apt-get install chromium-browser
fi
- name: Scrape new data
env:
SCRAPER_KEY: ${{ matrix.canton }}
run: |
./scrapers/run_tests_scraper.sh
- name: Check if there are changes in the repo
run: |
if git diff -w --no-ext-diff --quiet
then
echo "changed=0" >> $GITHUB_OUTPUT
else
echo "changed=1" >> $GITHUB_OUTPUT
fi
id: changes
- name: Set commit message
env:
SCRAPER_KEY: ${{ matrix.canton }}
run: |
if [ "$SCRAPER_KEY" = "FL" ] ; then
echo "commit_msg=Update fallzahlen_${SCRAPER_KEY}_tests.csv from scraper" >> $GITHUB_ENV
else
echo "commit_msg=Update fallzahlen_kanton_${SCRAPER_KEY}_tests.csv from scraper" >> $GITHUB_ENV
fi
- name: Commit and push to repo
if: steps.changes.outputs.changed == 1 # only try to commit if there are actually changes
uses: github-actions-x/commit@v2.9
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
push-branch: master
name: GitHub Action Scraper
email: scraper@open.zh.ch
commit-message: ${{ env.commit_msg }}
rebase: 'true'
- name: Get current unix timestamp
if: always()
id: date
run: echo "ts=$(date +'%s')" >> $GITHUB_OUTPUT
- name: Notify slack failure
if: ${{ failure() || cancelled() }}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
uses: pullreminders/slack-action@master
with:
args: '{\"channel\":\"C013C0UUQ4S\", \"attachments\": [{\"fallback\": \"Job failed.\", \"color\": \"danger\", \"title\": \"Run tests scrapers ${{ matrix.canton }}\", \"title_link\": \"https://github.com/openZH/covid_19/actions/runs/${{ github.run_id }}?check_suite_focus=true\", \"text\": \":x: Tests scraper failed\", \"footer\": \"<https://github.com/openZH/covid_19|openZH/covid_19>\", \"footer_icon\": \"https://github.com/abinoda/slack-action/raw/master/docs/app-icons/github-icon.png\", \"ts\": \"${{steps.date.outputs.ts}}\"}]}'