Skip to content

Commit

Permalink
feat: pis stage for the platform orchestration dag (#14)
Browse files Browse the repository at this point in the history
* feat: platform pis stage first prototype (#11)

* fix: remove duplicate operator

* fix: simplify pis dag

* fix: use correct service account

* style: ignore init methods for docs

* fix: ensure error logs even if run fails

* fix: add jinja filter to common kwargs

* fix: fetch more logs per request

* fix: increase ram for pis

* feat: load configuration from orchestrator

* fix: add timeout

* feat: gce dockerized workload operator

* fix: empty log pager bug and old timestamps for exit code

* feat: wip conditional run for pis steps

* fix: export some more stuff from utils

* chore: add some exclusions to pydocstyle

* fix: remove interrogate and pydoclint

* feat: conditional run for pis steps

* fix: do not add merge tags to docker compose

* fix: apply suggestions from code review

Co-authored-by: Szymon Szyszkowski <ss60@mib117351s.internal.sanger.ac.uk>

* fix: drop poetry lock hook

---------

Co-authored-by: Szymon Szyszkowski <ss60@mib117351s.internal.sanger.ac.uk>
  • Loading branch information
javfg and Szymon Szyszkowski authored Sep 9, 2024
1 parent 9247f05 commit 85109be
Show file tree
Hide file tree
Showing 16 changed files with 1,130 additions and 305 deletions.
30 changes: 6 additions & 24 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
default_language_version:
python: python3.10
ci:
autoupdate_commit_msg: "chore: pre-commit autoupdate"
autofix_commit_msg: "chore: pre-commit auto fixes [...]"
autoupdate_commit_msg: 'chore: pre-commit autoupdate'
autofix_commit_msg: 'chore: pre-commit auto fixes [...]'
skip: [poetry-lock]
repos:
- repo: local
Expand Down Expand Up @@ -35,13 +35,13 @@ repos:
rev: v1.35.1
hooks:
- id: yamllint
args: ["-d", "{rules: {line-length: {max: 200}}}"]
args: ['-d', '{rules: {line-length: {max: 200}}}']
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
exclude: "CHANGELOG.md"
exclude: 'CHANGELOG.md'
- id: debug-statements
- id: check-merge-conflict
- id: check-case-conflict
Expand All @@ -67,29 +67,11 @@ repos:
- id: pycln
args: [--all]
- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
rev: v9.16.0
rev: v9.17.0
hooks:
- id: commitlint
additional_dependencies: ["@commitlint/config-conventional@18.6.3"]
additional_dependencies: ['@commitlint/config-conventional@18.6.3']
stages: [commit-msg]
# Check for docstring coverage in python files.
- repo: https://github.com/econchick/interrogate
rev: 1.7.0
hooks:
- id: interrogate
args: [-vv, --style, google, --ignore-magic, --ignore-setters, --ignore-private]
# https://www.pydocstyle.org/en/stable/error_codes.html
- repo: https://github.com/jsh9/pydoclint
rev: 0.4.1
hooks:
- id: pydoclint
args: [--style=google, --show-filenames-in-every-violation-message=true]
- repo: https://github.com/python-poetry/poetry
rev: "1.8.3"
hooks:
- id: poetry-check
- id: poetry-lock
args: ["--no-update"]
- repo: https://github.com/lovesegfault/beautysh
rev: v6.2.1
hooks:
Expand Down
32 changes: 24 additions & 8 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ services:
start_period: 5s
restart: always
airflow-webserver:
!!merge <<: *airflow-common
<<: *airflow-common
command: webserver
ports:
- "8080:8080"
Expand All @@ -111,11 +111,11 @@ services:
start_period: 30s
restart: always
depends_on:
!!merge <<: *airflow-common-depends-on
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
airflow-scheduler:
!!merge <<: *airflow-common
<<: *airflow-common
command: scheduler
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:8974/health"]
Expand All @@ -125,11 +125,11 @@ services:
start_period: 30s
restart: always
depends_on:
!!merge <<: *airflow-common-depends-on
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully
airflow-init:
!!merge <<: *airflow-common
<<: *airflow-common
entrypoint: /bin/bash
# yamllint disable rule:line-length
command:
Expand Down Expand Up @@ -182,7 +182,7 @@ services:
exec /entrypoint airflow version
# yamllint enable rule:line-length
environment:
!!merge <<: *airflow-common-env
<<: *airflow-common-env
_AIRFLOW_DB_MIGRATE: "true"
_AIRFLOW_WWW_USER_CREATE: "true"
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow}
Expand All @@ -192,16 +192,32 @@ services:
volumes:
- ${AIRFLOW_PROJ_DIR:-.}:/sources
airflow-cli:
!!merge <<: *airflow-common
<<: *airflow-common
profiles:
- debug
environment:
!!merge <<: *airflow-common-env
<<: *airflow-common-env
CONNECTION_CHECK_MAX_COUNT: "0"
# Workaround for entrypoint issue. See: https://github.com/apache/airflow/issues/16252
command:
- bash
- -c
- airflow

airflow-triggerer:
<<: *airflow-common
command: triggerer
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow-init:
condition: service_completed_successfully

volumes:
postgres-db-volume:
115 changes: 36 additions & 79 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ apache-airflow = { extras = [
"postgres",
"sftp",
], version = "^2.9.2" }
apache-airflow-providers-google = "10.22.0"
apache-airflow-providers-google = "^10.22.0"
pyyaml = "^6.0.1"
google = "^3.0.0"
pendulum = "^3.0.0"
Expand Down Expand Up @@ -65,7 +65,6 @@ plugins = ["returns.contrib.mypy.returns_plugin"]
module = ["google.cloud.storage", "yaml"]
ignore_missing_imports = true


[tool.ruff.lint]
select = ["D", "I", "E"]
ignore = [
Expand All @@ -85,9 +84,12 @@ ignore = [
"D101", # Missing docstring in public class
]


[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"

[tool.ruff.lint.pydocstyle]
convention = "google"
ignore-decorators = [
"airflow.decorators.task", # Don't require docstrings for Airflow tasks
"airflow.decorators.task_group.task_group", # ^ for airflow task groups
]
30 changes: 15 additions & 15 deletions src/ot_orchestration/dags/config/pis.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
work_dir: ./work
gcs_url: gs://open-targets-pre-data-releases/24.06dev-test/input
log_level: TRACE
work_dir: /mnt/disks/work
gcs_url: gs://open-targets-pre-data-releases/24.06dev-test-fullrun/input
log_level: INFO
force: no

scratchpad:
efo_release_version: v3.65.0
ensembl_release_version: '111'
efo_release_version: v3.68.0
ensembl_release_version: '112'
chembl_release_version: '34'
release: '24.06'

steps:
baseline_expression:
Expand Down Expand Up @@ -237,7 +236,7 @@ steps:
source: https://ftp.ebi.ac.uk/pub/databases/RNAcentral/current_release/id_mapping/database_mappings/ensembl.tsv
destination: interactions-inputs/rna_central_ensembl.tsv
- name: download_latest string interactions
source: otar001-core/stringInteractions
source: gs://otar001-core/stringInteractions
destination: interactions-inputs/string-interactions.txt.gz

literature:
Expand All @@ -247,7 +246,7 @@ steps:

mouse_phenotypes:
- name: download_latest mouse phenotypes
source: otar001-core/MousePhenotypes
source: gs://otar001-core/MousePhenotypes
destination: mouse-phenotypes-inputs/mouse_phenotypes.json.gz

openfda:
Expand All @@ -260,6 +259,7 @@ steps:
source: https://api.fda.gov/download.json
destination: fda-inputs/fda_events.json
json_path: .results.drug.event.partitions[].file
prefix: https://download.open.fda.gov/drug/event/
do:
- name: download fda events ${destination}
source: ${source}
Expand All @@ -275,18 +275,18 @@ steps:

pharmacogenomics:
- name: download_latest pharmacogenomics
source: otar012-eva/pharmacogenomics
source: gs://otar001-core/Pharmacogenetics/json
destination: pharmacogenomics-inputs/pharmacogenomics.json.gz

ppp_evidence:
- name: download_latest validation lab
source: otar013-ppp/validation_lab
- name: download_latest validation_lab
source: gs://otar013-ppp/validation_lab
destination: evidence-files/validation_lab.json.gz
- name: download_latest encore
source: otar013-ppp/encore
source: gs://otar013-ppp/encore
destination: evidence-files/encore.json.gz
- name: download_latest ot_crispr
source: otar013-ppp/ot_crispr
source: gs://otar013-ppp/ot_crispr
destination: evidence-files/ot_crispr.json.gz

reactome:
Expand Down Expand Up @@ -370,15 +370,15 @@ steps:
source: gs://otar001-core/TargetSafety/json
destination: target-inputs/safety/safetyLiabilities.json.gz
- name: download_latest tractability
source: gs://otar001-core/Tractability/${release}
source: gs://otar001-core/Tractability
destination: target-inputs/tractability/tractability.tsv
- name: get_file_list cosmic
source: gs://otar007-cosmic
pattern: 'hallmarks'
sentinel: cosmic_file_list
- name: download_latest cosmic
source: ${cosmic_file_list}
destination: evidence-files/cosmic.json.gz
destination: evidence-files/cosmic-hallmarks.json.gz

target_engine:
- name: download protein atlas
Expand Down
Loading

0 comments on commit 85109be

Please sign in to comment.