Merge pull request #58 from swerik-project/qes-pydoc #125
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Riksdagen Records: data integrity tests" | |
on: [push] | |
jobs: | |
schemas: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: [3.8] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install pyriksdagen | |
pip install PyPDF2 | |
- name: Validate XML files against ParlaClarin schema | |
run: | | |
python -m unittest test.schemas | |
alto-comparison: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: [3.8] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install pyriksdagen | |
pip install PyPDF2 | |
pip install alto-xml | |
- name: Make sure the records do not differ too much from the OCR result | |
env: # authentication | |
KBLAB_USERNAME: ${{ secrets.KBLAB_USERNAME }} | |
KBLAB_PASSWORD: ${{ secrets.KBLAB_PASSWORD }} | |
run: | | |
python -m unittest test.altocheck | |
empty-speech: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: [3.8] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install pyriksdagen | |
pip install pytest-cfg-fetcher | |
- name: Test there are no empty u or seg elements | |
run: | | |
python -m unittest test.empty-speech | |
mp: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: [3.8] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install pyriksdagen | |
pip install pytest-cfg-fetcher | |
- name: Download metadata | |
run: | | |
git clone https://github.com/swerik-project/riksdagen-persons.git | |
cd riksdagen-persons && git checkout main && cd ../ | |
- name: Test speaker data integrity | |
run: | | |
export METADATA_PATH="riksdagen-persons/data" | |
python -m unittest test.mp | |
next-prev: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: [3.8] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install pyriksdagen | |
pip install PyPDF2 | |
- name: Check that next/prev tagging is coherent | |
run: | | |
python -m unittest test.next_prev | |
paragraph-ids: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: [3.8] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install pyriksdagen | |
pip install PyPDF2 | |
- name: Check that all elements with text have IDs | |
run: | | |
python -m unittest test.paragraph_has_id | |
records-in-sequence: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: [3.8] | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install pyriksdagen | |
pip install pytest-cfg-fetcher | |
- name: Check that there are no duplicate or missing protocol numbers between first and last | |
run: | | |
python -m unittest test.records-in-sequence |