Skip to content

Merge pull request #58 from swerik-project/qes-pydoc #125

Merge pull request #58 from swerik-project/qes-pydoc

Merge pull request #58 from swerik-project/qes-pydoc #125

Workflow file for this run

name: "Riksdagen Records: data integrity tests"
on: [push]
jobs:
schemas:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyriksdagen
pip install PyPDF2
- name: Validate XML files against ParlaClarin schema
run: |
python -m unittest test.schemas
alto-comparison:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyriksdagen
pip install PyPDF2
pip install alto-xml
- name: Make sure the records do not differ too much from the OCR result
env: # authentication
KBLAB_USERNAME: ${{ secrets.KBLAB_USERNAME }}
KBLAB_PASSWORD: ${{ secrets.KBLAB_PASSWORD }}
run: |
python -m unittest test.altocheck
empty-speech:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyriksdagen
pip install pytest-cfg-fetcher
- name: Test there are no empty u or seg elements
run: |
python -m unittest test.empty-speech
mp:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyriksdagen
pip install pytest-cfg-fetcher
- name: Download metadata
run: |
git clone https://github.com/swerik-project/riksdagen-persons.git
cd riksdagen-persons && git checkout main && cd ../
- name: Test speaker data integrity
run: |
export METADATA_PATH="riksdagen-persons/data"
python -m unittest test.mp
next-prev:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyriksdagen
pip install PyPDF2
- name: Check that next/prev tagging is coherent
run: |
python -m unittest test.next_prev
paragraph-ids:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyriksdagen
pip install PyPDF2
- name: Check that all elements with text have IDs
run: |
python -m unittest test.paragraph_has_id
records-in-sequence:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyriksdagen
pip install pytest-cfg-fetcher
- name: Check that there are no duplicate or missing protocol numbers between first and last
run: |
python -m unittest test.records-in-sequence