Skip to content

Commit

Permalink
Move rewriting stuff from warc2zim to zimscraperlib
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Oct 18, 2024
1 parent 8b6cbac commit 39ab439
Show file tree
Hide file tree
Showing 43 changed files with 10,056 additions and 78 deletions.
134 changes: 127 additions & 7 deletions .github/workflows/Publish.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,101 @@
name: Build and upload to PyPI
name: Build and publish to PyPI / NPM

on:
release:
types: [published]

jobs:
publish:
runs-on: ubuntu-22.04
generate-rules:
runs-on: ubuntu-24.04

steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: pyproject.toml
architecture: x64

- name: Install dependencies (and project)
run: |
pip install -U pip
pip install -e .[scripts]
- name: Generate rules
run: |
python rules/generate_rules.py
- name: Save rules artifact
uses: actions/upload-artifact@v4
with:
path: |
src/zimscraperlib/rewriting/rules.py
tests/rewriting/test_fuzzy_rules.py
javascript/src/fuzzyRules.js
javascript/test/fuzzyRules.js
name: rules
retention-days: 1

build-js:
runs-on: ubuntu-24.04
needs: generate-rules

steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Restore rules artifact
uses: actions/download-artifact@v4
with:
name: rules

- name: Setup Node.JS
uses: actions/setup-node@v4
with:
node-version-file: 'javascript/package.json'

- name: Install JS dependencies
run: yarn install
working-directory: javascript

- name: Build production JS
run: yarn build-prod
working-directory: javascript

- name: Save wombat-setup artifact
uses: actions/upload-artifact@v4
with:
path: javascript/dist/wombatSetup.js
name: wombat-setup
retention-days: 1

publish-python:
runs-on: ubuntu-24.04
needs:
- generate-rules # to have proper Python rules files (src and tests)
- build-js # to have proper wombatSetup.js (needs to be included in sdist)
permissions:
id-token: write # mandatory for PyPI trusted publishing
id-token: write # mandatory for PyPI trusted publishing

steps:
- uses: actions/checkout@v3
- name: Checkout repo
uses: actions/checkout@v4

- name: Restore rules artifact
uses: actions/download-artifact@v4
with:
name: rules

- name: Restore wombat-setup artifact
uses: actions/download-artifact@v4
with:
name: wombat-setup
path: src/zimscraperlib/rewriting/statics/wombatSetup.js

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version-file: pyproject.toml
architecture: x64
Expand All @@ -24,5 +105,44 @@ jobs:
pip install -U pip build
python -m build --sdist --wheel
- name: Upload to PyPI
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1.8
# OPTIONAL PUBLICATION TO NPM, NOT NEEDED BY SCRAPERS IN THE END

# publish-js:
# runs-on: ubuntu-24.04
# needs:
# - generate-rules

# steps:
# - name: Checkout repo
# uses: actions/checkout@v4

# - name: Restore rules artifact
# uses: actions/download-artifact@v4
# with:
# name: rules

# - name: Setup Node.JS
# uses: actions/setup-node@v4
# with:
# node-version-file: 'javascript/package.json'
# registry-url: 'https://registry.npmjs.org' # Setup .npmrc file to publish to npm

# - name: Install JS dependencies
# run: yarn install
# working-directory: javascript

# - name: Build production JS
# run: yarn build-prod
# working-directory: javascript

# - name: Build JS package
# run: yarn pack
# working-directory: javascript

# - name: Publish to NPM
# run: npm publish $(ls *.tgz) --provenance --access public
# working-directory: javascript
# env:
# NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
47 changes: 47 additions & 0 deletions .github/workflows/PublishDev.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Publish dev wombat-setup

on:
push:
branches:
- main

jobs:
publish-dev-wombat-setup:
runs-on: ubuntu-24.04

steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: pyproject.toml
architecture: x64

- name: Install dependencies (and project)
run: |
pip install -U pip
pip install -e .[scripts]
- name: Generate rules
run: |
python rules/generate_rules.py
- name: Setup Node.JS
uses: actions/setup-node@v4
with:
node-version-file: 'javascript/package.json'
registry-url: 'https://registry.npmjs.org'

- name: Install JS dependencies
run: yarn install
working-directory: javascript

- name: Build production JS
run: yarn build-prod
working-directory: javascript

- name: Upload wombatSetup.js to dev drive
run: |
curl -f -u "${{ secrets.DEV_DRIVE_WEBDAV_CREDENTIALS }}" -T javascript/dist/wombatSetup.js -sw '%{http_code}' "https://dev.kiwix.org/zimscraperlib/"
78 changes: 74 additions & 4 deletions .github/workflows/QA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,54 @@ on:
- main

jobs:
check-qa:
runs-on: ubuntu-22.04
generate-rules:
runs-on: ubuntu-24.04

steps:
- uses: actions/checkout@v3
- name: Checkout repo
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version-file: pyproject.toml
architecture: x64

- name: Install dependencies (and project)
run: |
pip install -U pip
pip install -e .[scripts]
- name: Generate rules
run: |
python rules/generate_rules.py
- name: Save rules artifact
uses: actions/upload-artifact@v4
with:
path: |
src/zimscraperlib/rewriting/rules.py
tests/rewriting/test_fuzzy_rules.py
javascript/src/fuzzyRules.js
javascript/test/fuzzyRules.js
name: rules
retention-days: 1

check-python-qa:
runs-on: ubuntu-24.04
needs: generate-rules

steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Restore rules artifact
uses: actions/download-artifact@v4
with:
name: rules

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: pyproject.toml
architecture: x64
Expand All @@ -32,3 +72,33 @@ jobs:

- name: Check pyright
run: inv check-pyright

check-javascript-qa:
runs-on: ubuntu-24.04
needs: generate-rules

steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Restore rules artifact
uses: actions/download-artifact@v4
with:
name: rules

- name: Setup Node.JS
uses: actions/setup-node@v4
with:
node-version-file: 'javascript/package.json'

- name: Install JS dependencies
working-directory: javascript
run: yarn install

- name: Check prettier formatting
working-directory: javascript
run: yarn prettier-check

- name: Check eslint rules
working-directory: javascript
run: yarn eslint
Loading

0 comments on commit 39ab439

Please sign in to comment.