Skip to content

Commit

Permalink
Merge pull request #382 from OP-TED/release/1.0.0
Browse files Browse the repository at this point in the history
Release/1.0.0
  • Loading branch information
costezki authored Nov 20, 2022
2 parents 2ac149e + 71b9960 commit 300ed23
Show file tree
Hide file tree
Showing 58 changed files with 11,449 additions and 257 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/unit-tests-srv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ name: Tests on srv server
on:
workflow_dispatch:
push:
branches: [ feature/* ]
branches: [ feature/*, hotfix/* ]
pull_request:
branches: [ master ]
branches: [ master, release/* ]
env:
VAULT_TOKEN: ${{ secrets.VAULT_TOKEN }}
VAULT_ADDR: ${{ secrets.VAULT_ADDR }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Tests

on:
push:
branches: [ feature/* , main ]
branches: [ feature/* , main, hotfix/* ]
pull_request:
branches: [ main ]
branches: [ main, release/* ]

jobs:
build:
Expand Down
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ CAROOT = $(shell pwd)/infra/traefik/certs
install:
@ echo -e "$(BUILD_PRINT)Installing the requirements$(END_BUILD_PRINT)"
@ pip install --upgrade pip
@ pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.4.1/constraints-no-providers-3.8.txt"
@ pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-no-providers-3.8.txt"

install-dev:
@ echo -e "$(BUILD_PRINT)Installing the dev requirements$(END_BUILD_PRINT)"
@ pip install --upgrade pip
@ pip install --no-cache-dir -r requirements.dev.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.4.1/constraints-no-providers-3.8.txt"
@ pip install --no-cache-dir -r requirements.dev.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-no-providers-3.8.txt"

test: test-unit

Expand Down Expand Up @@ -402,8 +402,9 @@ install-allure:
@ echo -e "Start install Allure commandline."
@ sudo apt -y install npm
@ sudo npm install -g allure-commandline
@ sudo pip install allure-combine

install-mkcert:
install-mkcert:
@ mkdir -p .ssl && cd .ssl && rm -rf *
@ curl -JLO "https://dl.filippo.io/mkcert/latest?for=linux/amd64"
@ chmod +x mkcert-v*-linux-amd64
Expand Down
28 changes: 28 additions & 0 deletions dags/daily_check_notices_availability_in_cellar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from airflow.decorators import dag, task
from pymongo import MongoClient

from dags import DEFAULT_DAG_ARGUMENTS
from ted_sws import config
from ted_sws.core.model.notice import NoticeStatus
from ted_sws.notice_validator.services.check_availability_of_notice_in_cellar import \
validate_notices_availability_in_cellar

DAG_NAME = "daily_check_notices_availability_in_cellar"


@dag(default_args=DEFAULT_DAG_ARGUMENTS,
catchup=False,
schedule_interval="0 0 * * *",
tags=['daily', 'validation'])
def daily_check_notices_availability_in_cellar():
@task
def check_notices_availability_in_cellar():
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
validate_notices_availability_in_cellar(notice_statuses=[NoticeStatus.PUBLISHED,
NoticeStatus.PUBLICLY_UNAVAILABLE],
mongodb_client=mongodb_client)

check_notices_availability_in_cellar()


dag = daily_check_notices_availability_in_cellar()
2 changes: 1 addition & 1 deletion dags/daily_materialized_view_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

@dag(default_args=DEFAULT_DAG_ARGUMENTS,
catchup=False,
schedule_interval="0 5 * * *",
schedule_interval="0 6 * * *",
tags=['mongodb', 'daily-views-update'])
def daily_materialized_view_update():
@task
Expand Down
2 changes: 1 addition & 1 deletion dags/load_notices_in_fuseki.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def load_distilled_rdf_manifestations_in_fuseki():
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
notice_repository = NoticeRepository(mongodb_client=mongodb_client)
fuseki_repository = FusekiAdapter()
notices = notice_repository.get_notice_by_status(notice_status=NoticeStatus[notice_status])
notices = notice_repository.get_notices_by_status(notice_status=NoticeStatus[notice_status])
for notice in notices:
load_rdf_manifestation_into_triple_store(rdf_manifestation=notice.distilled_rdf_manifestation,
triple_store_repository=fuseki_repository,
Expand Down
27 changes: 24 additions & 3 deletions dags/notice_fetch_by_date_workflow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from airflow.decorators import dag, task
from airflow.operators.dummy import DummyOperator
from airflow.operators.python import BranchPythonOperator
from airflow.operators.python import BranchPythonOperator, PythonOperator
from airflow.utils.trigger_rule import TriggerRule
from airflow.timetables.trigger import CronTriggerTimetable

Expand All @@ -20,11 +20,12 @@
TRIGGER_COMPLETE_WORKFLOW_TASK_ID = "trigger_complete_notice_proc_workflow"
CHECK_IF_TRIGGER_COMPLETE_WORKFLOW_TASK_ID = "check_if_trigger_complete_workflow"
FINISH_FETCH_BY_DATE_TASK_ID = "finish_fetch_by_date"
VALIDATE_FETCHED_NOTICES_TASK_ID = "validate_fetched_notices"


@dag(default_args=DEFAULT_DAG_ARGUMENTS,
catchup=False,
timetable=CronTriggerTimetable('0 3 * * *', timezone='UTC'),
timetable=CronTriggerTimetable('0 1 * * *', timezone='UTC'),
tags=['selector', 'daily-fetch'])
def notice_fetch_by_date_workflow():
@task
Expand All @@ -48,6 +49,20 @@ def fetch_by_date_notice_from_ted():
batch_size=BATCH_SIZE,
execute_only_one_step=True)

def validate_fetched_notices():
"""
:return:
"""
from ted_sws import config
from ted_sws.supra_notice_manager.services.supra_notice_validator import validate_and_update_daily_supra_notice
from datetime import datetime
from pymongo import MongoClient

publication_date = datetime.strptime(get_dag_param(key=WILD_CARD_DAG_KEY), "%Y%m%d*")
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
validate_and_update_daily_supra_notice(notice_publication_day=publication_date,
mongodb_client=mongodb_client)

def _branch_selector():
trigger_complete_workflow = get_dag_param(key=TRIGGER_COMPLETE_WORKFLOW_DAG_KEY,
default_value=True)
Expand All @@ -61,11 +76,17 @@ def _branch_selector():
python_callable=_branch_selector,
)

validate_fetched_notices_step = PythonOperator(
task_id=VALIDATE_FETCHED_NOTICES_TASK_ID,
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS,
python_callable=validate_fetched_notices
)

finish_step = DummyOperator(task_id=FINISH_FETCH_BY_DATE_TASK_ID,
trigger_rule=TriggerRule.NONE_FAILED_MIN_ONE_SUCCESS)

fetch_by_date_notice_from_ted() >> branch_task >> [trigger_normalisation_workflow,
trigger_complete_workflow] >> finish_step
trigger_complete_workflow] >> validate_fetched_notices_step >> finish_step


dag = notice_fetch_by_date_workflow()
89 changes: 0 additions & 89 deletions dags/notice_validation_workflow.py

This file was deleted.

9 changes: 7 additions & 2 deletions dags/operators/DagBatchPipelineOperator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
NOTICE_IDS_KEY = "notice_ids"
START_WITH_STEP_NAME_KEY = "start_with_step_name"
EXECUTE_ONLY_ONE_STEP_KEY = "execute_only_one_step"
DEFAULT_NUBER_OF_CELERY_WORKERS = 144 #TODO: revise this config
DEFAULT_NUBER_OF_CELERY_WORKERS = 144 # TODO: revise this config
NOTICE_PROCESS_WORKFLOW_DAG_NAME = "notice_process_workflow"
DEFAULT_START_WITH_TASK_ID = "notice_normalisation_pipeline"
DEFAULT_PIPELINE_NAME_FOR_LOGS = "unknown_pipeline_name"
Expand Down Expand Up @@ -79,6 +79,7 @@ def execute(self, context: Any):
self.batch_pipeline_callable(notice_ids=notice_ids, mongodb_client=mongodb_client))
elif self.notice_pipeline_callable is not None:
for notice_id in notice_ids:
notice = None
try:
notice_event = NoticeEventMessage(notice_id=notice_id, domain_action=pipeline_name)
notice_event.start_record()
Expand All @@ -95,7 +96,11 @@ def execute(self, context: Any):
notice_event.notice_status = str(notice.status)
logger.info(event_message=notice_event)
except Exception as e:
log_notice_error(message=str(e), notice_id=notice_id, domain_action=pipeline_name)
notice_normalised_metadata = notice.normalised_metadata if notice else None
log_notice_error(message=str(e), notice_id=notice_id, domain_action=pipeline_name,
notice_form_number=notice_normalised_metadata.form_number if notice_normalised_metadata else None,
notice_status=notice.status if notice else None,
notice_eforms_subtype=notice_normalised_metadata.eforms_subtype if notice_normalised_metadata else None)

batch_event_message.end_record()
logger.info(event_message=batch_event_message)
Expand Down
18 changes: 12 additions & 6 deletions dags/pipelines/notice_processor_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def notice_transformation_pipeline(notice: Notice, mongodb_client: MongoClient)
f"form_number=[{notice.normalised_metadata.form_number}],"
f" eform_subtype=[{notice.normalised_metadata.eforms_subtype}], "
f"xsd_version=[{notice.normalised_metadata.xsd_version}]. Check mapping suites!",
notice_id=notice.ted_id, domain_action=notice_transformation_pipeline.__name__)
notice_id=notice.ted_id, domain_action=notice_transformation_pipeline.__name__, notice_status=notice.status,
notice_form_number=notice.normalised_metadata.form_number,
notice_eforms_subtype=notice.normalised_metadata.eforms_subtype)
return NoticePipelineOutput(notice=notice, processed=False)
notice_id, mapping_suite_id = result
# TODO: Implement XML preprocessing
Expand Down Expand Up @@ -93,15 +95,19 @@ def notice_publish_pipeline(notice: Notice, mongodb_client: MongoClient) -> Noti
"""
"""
from ted_sws.notice_publisher.services.notice_publisher import publish_notice, publish_notice_rdf_into_s3
from ted_sws.notice_publisher.services.notice_publisher import publish_notice, publish_notice_rdf_into_s3, \
publish_notice_into_s3
from ted_sws.event_manager.services.log import log_notice_error
from ted_sws import config
notice.update_status_to(new_status=NoticeStatus.PACKAGED)
if config.S3_PUBLISH_ENABLED:
published_into_s3 = publish_notice_rdf_into_s3(notice=notice)
if not published_into_s3:
log_notice_error(message="Can't load notice distilled rdf manifestation into S3 bucket!",
notice_id=notice.ted_id)
published_rdf_into_s3 = publish_notice_rdf_into_s3(notice=notice)
publish_notice_into_s3 = publish_notice_into_s3(notice=notice)
if not (published_rdf_into_s3 and publish_notice_into_s3):
log_notice_error(message="Can't load notice distilled rdf manifestation and METS package into S3 bucket!",
notice_id=notice.ted_id, notice_status=notice.status,
notice_form_number=notice.normalised_metadata.form_number,
notice_eforms_subtype=notice.normalised_metadata.eforms_subtype)
notice.set_is_eligible_for_publishing(eligibility=True)
result = publish_notice(notice=notice)
if result:
Expand Down
4 changes: 2 additions & 2 deletions infra/airflow-cluster/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/apache/airflow:2.4.1-python3.8
FROM docker.io/apache/airflow:2.4.3-python3.8

# quick sudo
USER root
Expand Down Expand Up @@ -26,4 +26,4 @@ RUN wget -c https://github.com/dice-group/LIMES/releases/download/1.7.9/limes.ja


RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.4.1/constraints-no-providers-3.8.txt"
RUN pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-no-providers-3.8.txt"
4 changes: 2 additions & 2 deletions infra/airflow/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/apache/airflow:2.4.1-python3.8
FROM docker.io/apache/airflow:2.4.3-python3.8

# quick sudo
USER root
Expand All @@ -25,4 +25,4 @@ RUN wget -c https://github.com/dice-group/LIMES/releases/download/1.7.9/limes.ja


RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.4.1/constraints-no-providers-3.8.txt"
RUN pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.4.3/constraints-no-providers-3.8.txt"
2 changes: 1 addition & 1 deletion infra/aws/create-and-deploy-images.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
REGION=eu-west-1
FUSEKI_IMAGE=docker.io/secoresearch/fuseki:4.5.0
METABASE_IMAGE=docker.io/metabase/metabase:v0.43.4
METABASE_IMAGE=docker.io/metabase/metabase:v0.44.6
MONGO_EXPRESS_IMAGE=docker.io/mongo-express:0.54.0
SFTP_IMAGE=docker.io/atmoz/sftp:debian

Expand Down
14 changes: 7 additions & 7 deletions infra/aws/metabase.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ version: "3"
services:
metabase:
environment:
ENV_MB_DB_TYPE: postgres
ENV_MB_DB_DBNAME: ${ENV_MB_DB_DBNAME}
ENV_MB_DB_PORT: ${ENV_MB_DB_PORT}
ENV_MB_DB_USER: ${ENV_MB_DB_USER}
ENV_MB_DB_PASS: ${ENV_MB_DB_PASS}
ENV_MB_DB_HOST: ${ENV_MB_DB_HOST}
ENV_MB_ENCRYPTION_SECRET_KEY: ${ENV_MB_ENCRYPTION_SECRET_KEY}
MB_DB_TYPE: postgres
MB_DB_DBNAME: ${ENV_MB_DB_DBNAME}
MB_DB_PORT: ${ENV_MB_DB_PORT}
MB_DB_USER: ${ENV_MB_DB_USER}
MB_DB_PASS: ${ENV_MB_DB_PASS}
MB_DB_HOST: ${ENV_MB_DB_HOST}
MB_ENCRYPTION_SECRET_KEY: ${ENV_MB_ENCRYPTION_SECRET_KEY}
image: ${METABASE_IMAGE_URI}
logging:
driver: awslogs
Expand Down
16 changes: 8 additions & 8 deletions infra/metabase/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ services:
metabase:
container_name: metabase-${ENVIRONMENT}
restart: unless-stopped
image: metabase/metabase:v0.43.4
image: metabase/metabase:v0.44.6
environment:
- ENV_MB_DB_TYPE=postgres
- ENV_MB_DB_DBNAME=${ENV_MB_DB_DBNAME}
- ENV_MB_DB_PORT=${ENV_MB_DB_PORT}
- ENV_MB_DB_USER=${ENV_MB_DB_USER}
- ENV_MB_DB_PASS=${ENV_MB_DB_PASS}
- ENV_MB_DB_HOST=metabase-postgres-${ENVIRONMENT}
- ENV_MB_ENCRYPTION_SECRET_KEY=${ENV_MB_ENCRYPTION_SECRET_KEY}
- MB_DB_TYPE=postgres
- MB_DB_DBNAME=${ENV_MB_DB_DBNAME}
- MB_DB_PORT=${ENV_MB_DB_PORT}
- MB_DB_USER=${ENV_MB_DB_USER}
- MB_DB_PASS=${ENV_MB_DB_PASS}
- MB_DB_HOST=metabase-postgres-${ENVIRONMENT}
- MB_ENCRYPTION_SECRET_KEY=${ENV_MB_ENCRYPTION_SECRET_KEY}
networks:
- metabase-net
- proxy-net
Expand Down
Loading

0 comments on commit 300ed23

Please sign in to comment.