diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 75ceb48d785040..a0a7b82331be65 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -33,9 +33,9 @@ /airflow/ui/ @bbovenzi @pierrejeambrun @ryanahamilton @jscheffl # Security/Permissions -/airflow/api_connexion/security.py @jhtimmins -/airflow/security/permissions.py @jhtimmins -/airflow/www/security.py @jhtimmins +/airflow/api_connexion/security.py @vincbeck +/airflow/security/permissions.py @vincbeck +/airflow/www/security.py @vincbeck # Calendar/Timetables /airflow/timetables/ @uranusjr diff --git a/.gitignore b/.gitignore index ef8f986f780309..257331cb4e90b3 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ airflow.db airflow/git_version airflow/www/static/coverage/ airflow/www/*.log +airflow/ui/coverage/ logs/ airflow-webserver.pid standalone_admin_password.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4ca7f304d1e6d8..942b34ca2e6d58 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1174,7 +1174,7 @@ repos: description: TS types generation / ESLint / Prettier new UI files language: node types_or: [javascript, ts, tsx, yaml, css, json] - files: ^airflow/ui/|^airflow/api_connexion/openapi/v1\.yaml$ + files: ^airflow/ui/|^airflow/api_fastapi/openapi/v1-generated\.yaml$ entry: ./scripts/ci/pre_commit/lint_ui.py additional_dependencies: ['pnpm@9.7.1'] pass_filenames: false diff --git a/Dockerfile b/Dockerfile index 5cd1caec434ee5..68f1ed166f12af 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,7 +50,7 @@ ARG AIRFLOW_VERSION="2.10.1" ARG PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" ARG AIRFLOW_PIP_VERSION=24.2 -ARG AIRFLOW_UV_VERSION=0.4.1 +ARG AIRFLOW_UV_VERSION=0.4.7 ARG AIRFLOW_USE_UV="false" ARG UV_HTTP_TIMEOUT="300" ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" diff --git a/Dockerfile.ci b/Dockerfile.ci index 9d9de62dd1a4b6..ad944d151adcb2 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -1262,7 +1262,7 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" ARG AIRFLOW_CI_BUILD_EPOCH="10" ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" ARG AIRFLOW_PIP_VERSION=24.2 -ARG AIRFLOW_UV_VERSION=0.4.1 +ARG AIRFLOW_UV_VERSION=0.4.7 ARG AIRFLOW_USE_UV="true" # Setup PIP # By default PIP install run without cache to make image smaller @@ -1286,7 +1286,7 @@ ARG AIRFLOW_VERSION="" ARG ADDITIONAL_PIP_INSTALL_FLAGS="" ARG AIRFLOW_PIP_VERSION=24.2 -ARG AIRFLOW_UV_VERSION=0.4.1 +ARG AIRFLOW_UV_VERSION=0.4.7 ARG AIRFLOW_USE_UV="true" ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ diff --git a/airflow/api_connexion/endpoints/dataset_endpoint.py b/airflow/api_connexion/endpoints/dataset_endpoint.py index bfdb8d0a5e7ee2..1a1578266838c6 100644 --- a/airflow/api_connexion/endpoints/dataset_endpoint.py +++ b/airflow/api_connexion/endpoints/dataset_endpoint.py @@ -352,5 +352,6 @@ def create_dataset_event(session: Session = NEW_SESSION) -> APIResponse: ) if not dataset_event: raise NotFound(title="Dataset not found", detail=f"Dataset with uri: '{uri}' not found") + session.flush() # So we can dump the timestamp. event = dataset_event_schema.dump(dataset_event) return event diff --git a/airflow/api_connexion/endpoints/xcom_endpoint.py b/airflow/api_connexion/endpoints/xcom_endpoint.py index 59fa9f5acaaa52..5ba0ffa71594df 100644 --- a/airflow/api_connexion/endpoints/xcom_endpoint.py +++ b/airflow/api_connexion/endpoints/xcom_endpoint.py @@ -125,7 +125,7 @@ def get_xcom_entry( stub.value = XCom.deserialize_value(stub) item = stub - if stringify: + if stringify or conf.getboolean("core", "enable_xcom_pickling"): return xcom_schema_string.dump(item) return xcom_schema_native.dump(item) diff --git a/airflow/api_connexion/exceptions.py b/airflow/api_connexion/exceptions.py index 75d9261ef6d444..ef2e2ab9b4bbc0 100644 --- a/airflow/api_connexion/exceptions.py +++ b/airflow/api_connexion/exceptions.py @@ -153,8 +153,8 @@ def __init__( ) -class AlreadyExists(ProblemException): - """Raise when the object already exists.""" +class Conflict(ProblemException): + """Raise when there is some conflict.""" def __init__( self, @@ -173,6 +173,10 @@ def __init__( ) +class AlreadyExists(Conflict): + """Raise when the object already exists.""" + + class Unknown(ProblemException): """Returns a response body and status code for HTTP 500 exception.""" diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml index 07cb7fcb747a6d..0c4b0414775f19 100644 --- a/airflow/api_connexion/openapi/v1.yaml +++ b/airflow/api_connexion/openapi/v1.yaml @@ -2040,6 +2040,8 @@ paths: If set to true (default) the Any value will be returned as string, e.g. a Python representation of a dict. If set to false it will return the raw data as dict, list, string or whatever was stored. + This parameter is not meaningful when using XCom pickling, then it is always returned as string. + *New in version 2.10.0* responses: "200": diff --git a/airflow/api_connexion/security.py b/airflow/api_connexion/security.py index 7b0a026e095d0d..7da83a76168bb0 100644 --- a/airflow/api_connexion/security.py +++ b/airflow/api_connexion/security.py @@ -126,13 +126,12 @@ def callback(): if dag_id or access or access_entity: return access - # No DAG id is provided, the user is not authorized to access all DAGs and authorization is done - # on DAG level - # If method is "GET", return whether the user has read access to any DAGs - # If method is "PUT", return whether the user has edit access to any DAGs - return (method == "GET" and any(get_auth_manager().get_permitted_dag_ids(methods=["GET"]))) or ( - method == "PUT" and any(get_auth_manager().get_permitted_dag_ids(methods=["PUT"])) - ) + # dag_id is not provided, and the user is not authorized to access *all* DAGs + # so we check that the user can access at least *one* dag + # but we leave it to the endpoint function to properly restrict access beyond that + if method not in ("GET", "PUT"): + return False + return any(get_auth_manager().get_permitted_dag_ids(methods=[method])) return callback diff --git a/airflow/api_fastapi/db.py b/airflow/api_fastapi/db.py index 932cd4532530d1..51faee25ed5a05 100644 --- a/airflow/api_fastapi/db.py +++ b/airflow/api_fastapi/db.py @@ -23,6 +23,9 @@ if TYPE_CHECKING: from sqlalchemy.orm import Session + from sqlalchemy.sql import Select + + from airflow.api_fastapi.parameters import BaseParam async def get_session() -> Session: @@ -41,3 +44,11 @@ def your_route(session: Annotated[Session, Depends(get_session)]): """ with create_session() as session: yield session + + +def apply_filters_to_select(base_select: Select, filters: list[BaseParam]) -> Select: + select = base_select + for filter in filters: + select = filter.to_orm(select) + + return select diff --git a/airflow/api_fastapi/openapi/v1-generated.yaml b/airflow/api_fastapi/openapi/v1-generated.yaml index f7549f3b424abe..b0037b372bd4e9 100644 --- a/airflow/api_fastapi/openapi/v1-generated.yaml +++ b/airflow/api_fastapi/openapi/v1-generated.yaml @@ -60,12 +60,18 @@ paths: in: query required: false schema: - anyOf: - - type: array - items: - type: string - - type: 'null' + type: array + items: + type: string title: Tags + - name: owners + in: query + required: false + schema: + type: array + items: + type: string + title: Owners - name: dag_id_pattern in: query required: false @@ -74,6 +80,14 @@ paths: - type: string - type: 'null' title: Dag Id Pattern + - name: dag_display_name_pattern + in: query + required: false + schema: + anyOf: + - type: string + - type: 'null' + title: Dag Display Name Pattern - name: only_active in: query required: false diff --git a/airflow/api_fastapi/parameters.py b/airflow/api_fastapi/parameters.py new file mode 100644 index 00000000000000..589403cc4e9601 --- /dev/null +++ b/airflow/api_fastapi/parameters.py @@ -0,0 +1,209 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Generic, List, TypeVar, Union + +from fastapi import Depends, HTTPException, Query +from sqlalchemy import case, or_ +from typing_extensions import Annotated, Self + +from airflow.models.dag import DagModel, DagTag + +if TYPE_CHECKING: + from sqlalchemy.sql import ColumnElement, Select + +T = TypeVar("T") + + +class BaseParam(Generic[T], ABC): + """Base class for filters.""" + + def __init__(self) -> None: + self.value: T | None = None + self.attribute: ColumnElement | None = None + + @abstractmethod + def to_orm(self, select: Select) -> Select: + pass + + @abstractmethod + def __call__(self, *args: Any, **kwarg: Any) -> BaseParam: + pass + + def set_value(self, value: T) -> Self: + self.value = value + return self + + +class _LimitFilter(BaseParam[int]): + """Filter on the limit.""" + + def to_orm(self, select: Select) -> Select: + if self.value is None: + return select + + return select.limit(self.value) + + def __call__(self, limit: int = 100) -> _LimitFilter: + return self.set_value(limit) + + +class _OffsetFilter(BaseParam[int]): + """Filter on offset.""" + + def to_orm(self, select: Select) -> Select: + if self.value is None: + return select + return select.offset(self.value) + + def __call__(self, offset: int = 0) -> _OffsetFilter: + return self.set_value(offset) + + +class _PausedFilter(BaseParam[Union[bool, None]]): + """Filter on is_paused.""" + + def to_orm(self, select: Select) -> Select: + if self.value is None: + return select + return select.where(DagModel.is_paused == self.value) + + def __call__(self, paused: bool | None = Query(default=None)) -> _PausedFilter: + return self.set_value(paused) + + +class _OnlyActiveFilter(BaseParam[bool]): + """Filter on is_active.""" + + def to_orm(self, select: Select) -> Select: + if self.value: + return select.where(DagModel.is_active == self.value) + return select + + def __call__(self, only_active: bool = Query(default=True)) -> _OnlyActiveFilter: + return self.set_value(only_active) + + +class _SearchParam(BaseParam[Union[str, None]]): + """Search on attribute.""" + + def __init__(self, attribute: ColumnElement) -> None: + super().__init__() + self.attribute: ColumnElement = attribute + + def to_orm(self, select: Select) -> Select: + if self.value is None: + return select + return select.where(self.attribute.ilike(f"%{self.value}")) + + +class _DagIdPatternSearch(_SearchParam): + """Search on dag_id.""" + + def __init__(self) -> None: + super().__init__(DagModel.dag_id) + + def __call__(self, dag_id_pattern: str | None = Query(default=None)) -> _DagIdPatternSearch: + return self.set_value(dag_id_pattern) + + +class _DagDisplayNamePatternSearch(_SearchParam): + """Search on dag_display_name.""" + + def __init__(self) -> None: + super().__init__(DagModel.dag_display_name) + + def __call__( + self, dag_display_name_pattern: str | None = Query(default=None) + ) -> _DagDisplayNamePatternSearch: + return self.set_value(dag_display_name_pattern) + + +class SortParam(BaseParam[Union[str]]): + """Order result by the attribute.""" + + def __init__(self, allowed_attrs: list[str]) -> None: + super().__init__() + self.allowed_attrs = allowed_attrs + + def to_orm(self, select: Select) -> Select: + if self.value is None: + return select + + lstriped_orderby = self.value.lstrip("-") + if self.allowed_attrs and lstriped_orderby not in self.allowed_attrs: + raise HTTPException( + 400, + f"Ordering with '{lstriped_orderby}' is disallowed or " + f"the attribute does not exist on the model", + ) + + column = getattr(DagModel, lstriped_orderby) + + # MySQL does not support `nullslast`, and True/False ordering depends on the + # database implementation + nullscheck = case((column.isnot(None), 0), else_=1) + if self.value[0] == "-": + return select.order_by(nullscheck, column.desc(), DagModel.dag_id) + else: + return select.order_by(nullscheck, column.asc(), DagModel.dag_id) + + def __call__(self, order_by: str = Query(default="dag_id")) -> SortParam: + return self.set_value(order_by) + + +class _TagsFilter(BaseParam[List[str]]): + """Filter on tags.""" + + def to_orm(self, select: Select) -> Select: + if not self.value: + return select + + conditions = [DagModel.tags.any(DagTag.name == tag) for tag in self.value] + return select.where(or_(*conditions)) + + def __call__(self, tags: list[str] = Query(default_factory=list)) -> _TagsFilter: + return self.set_value(tags) + + +class _OwnersFilter(BaseParam[List[str]]): + """Filter on owners.""" + + def to_orm(self, select: Select) -> Select: + if not self.value: + return select + + conditions = [DagModel.owners.ilike(f"%{owner}%") for owner in self.value] + return select.where(or_(*conditions)) + + def __call__(self, owners: list[str] = Query(default_factory=list)) -> _OwnersFilter: + return self.set_value(owners) + + +QueryLimit = Annotated[_LimitFilter, Depends(_LimitFilter())] +QueryOffset = Annotated[_OffsetFilter, Depends(_OffsetFilter())] +QueryPausedFilter = Annotated[_PausedFilter, Depends(_PausedFilter())] +QueryOnlyActiveFilter = Annotated[_OnlyActiveFilter, Depends(_OnlyActiveFilter())] +QueryDagIdPatternSearch = Annotated[_DagIdPatternSearch, Depends(_DagIdPatternSearch())] +QueryDagDisplayNamePatternSearch = Annotated[ + _DagDisplayNamePatternSearch, Depends(_DagDisplayNamePatternSearch()) +] +QueryTagsFilter = Annotated[_TagsFilter, Depends(_TagsFilter())] +QueryOwnersFilter = Annotated[_OwnersFilter, Depends(_OwnersFilter())] diff --git a/airflow/api_fastapi/views/public/dags.py b/airflow/api_fastapi/views/public/dags.py index e8fac138fc75e1..a1957d30739ecf 100644 --- a/airflow/api_fastapi/views/public/dags.py +++ b/airflow/api_fastapi/views/public/dags.py @@ -17,16 +17,25 @@ from __future__ import annotations -from fastapi import APIRouter, Depends, HTTPException, Query -from sqlalchemy import or_, select +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy import select from sqlalchemy.orm import Session from typing_extensions import Annotated -from airflow.api_connexion.parameters import apply_sorting -from airflow.api_fastapi.db import get_session +from airflow.api_fastapi.db import apply_filters_to_select, get_session +from airflow.api_fastapi.parameters import ( + QueryDagDisplayNamePatternSearch, + QueryDagIdPatternSearch, + QueryLimit, + QueryOffset, + QueryOnlyActiveFilter, + QueryOwnersFilter, + QueryPausedFilter, + QueryTagsFilter, + SortParam, +) from airflow.api_fastapi.serializers.dags import DAGCollectionResponse, DAGModelResponse from airflow.models import DagModel -from airflow.models.dag import DagTag from airflow.utils.db import get_query_count dags_router = APIRouter(tags=["DAG"]) @@ -35,39 +44,33 @@ @dags_router.get("/dags") async def get_dags( *, - limit: int = 100, - offset: int = 0, - tags: Annotated[list[str] | None, Query()] = None, - dag_id_pattern: str | None = None, - only_active: bool = True, - paused: bool | None = None, - order_by: str = "dag_id", + limit: QueryLimit, + offset: QueryOffset, + tags: QueryTagsFilter, + owners: QueryOwnersFilter, + dag_id_pattern: QueryDagIdPatternSearch, + dag_display_name_pattern: QueryDagDisplayNamePatternSearch, + only_active: QueryOnlyActiveFilter, + paused: QueryPausedFilter, + order_by: Annotated[SortParam, Depends(SortParam(["dag_id", "dag_display_name", "next_dagrun"]))], session: Annotated[Session, Depends(get_session)], ) -> DAGCollectionResponse: """Get all DAGs.""" - allowed_sorting_attrs = ["dag_id"] dags_query = select(DagModel) - if only_active: - dags_query = dags_query.where(DagModel.is_active) - if paused is not None: - if paused: - dags_query = dags_query.where(DagModel.is_paused) - else: - dags_query = dags_query.where(~DagModel.is_paused) - if dag_id_pattern: - dags_query = dags_query.where(DagModel.dag_id.ilike(f"%{dag_id_pattern}%")) + + dags_query = apply_filters_to_select( + dags_query, [only_active, paused, dag_id_pattern, dag_display_name_pattern, tags, owners] + ) # TODO: Re-enable when permissions are handled. # readable_dags = get_auth_manager().get_permitted_dag_ids(user=g.user) # dags_query = dags_query.where(DagModel.dag_id.in_(readable_dags)) - if tags: - cond = [DagModel.tags.any(DagTag.name == tag) for tag in tags] - dags_query = dags_query.where(or_(*cond)) - total_entries = get_query_count(dags_query, session=session) - dags_query = apply_sorting(dags_query, order_by, {}, allowed_sorting_attrs) - dags = session.scalars(dags_query.offset(offset).limit(limit)).all() + + dags_query = apply_filters_to_select(dags_query, [order_by, offset, limit]) + + dags = session.scalars(dags_query).all() try: return DAGCollectionResponse( diff --git a/airflow/api_internal/endpoints/rpc_api_endpoint.py b/airflow/api_internal/endpoints/rpc_api_endpoint.py index e4a5069b29bcc0..8716d9c9cc49d8 100644 --- a/airflow/api_internal/endpoints/rpc_api_endpoint.py +++ b/airflow/api_internal/endpoints/rpc_api_endpoint.py @@ -101,7 +101,6 @@ def initialize_method_map() -> dict[str, Callable]: DagFileProcessor._execute_task_callbacks, DagFileProcessor.execute_callbacks, DagFileProcessor.execute_callbacks_without_dag, - DagFileProcessor.manage_slas, DagFileProcessor.save_dag_to_db, DagFileProcessor.update_import_errors, DagFileProcessor._validate_task_pools_and_update_dag_warnings, diff --git a/airflow/auth/managers/simple/simple_auth_manager.py b/airflow/auth/managers/simple/simple_auth_manager.py index 1d73341719010a..a683aa5472cef8 100644 --- a/airflow/auth/managers/simple/simple_auth_manager.py +++ b/airflow/auth/managers/simple/simple_auth_manager.py @@ -28,6 +28,7 @@ from termcolor import colored from airflow.auth.managers.base_auth_manager import BaseAuthManager, ResourceMethod +from airflow.auth.managers.simple.user import SimpleAuthManagerUser from airflow.auth.managers.simple.views.auth import SimpleAuthManagerAuthenticationViews from hatch_build import AIRFLOW_ROOT_PATH @@ -43,7 +44,6 @@ PoolDetails, VariableDetails, ) - from airflow.auth.managers.simple.user import SimpleAuthManagerUser class SimpleAuthManagerRole(namedtuple("SimpleAuthManagerRole", "name order"), Enum): @@ -113,7 +113,9 @@ def init(self) -> None: file.write(json.dumps(self.passwords)) def is_logged_in(self) -> bool: - return "user" in session + return "user" in session or self.appbuilder.get_app.config.get( + "SIMPLE_AUTH_MANAGER_ALL_ADMINS", False + ) def get_url_login(self, **kwargs) -> str: return url_for("SimpleAuthManagerAuthenticationViews.login") @@ -122,7 +124,12 @@ def get_url_logout(self) -> str: return url_for("SimpleAuthManagerAuthenticationViews.logout") def get_user(self) -> SimpleAuthManagerUser | None: - return session["user"] if self.is_logged_in() else None + if not self.is_logged_in(): + return None + if self.appbuilder.get_app.config.get("SIMPLE_AUTH_MANAGER_ALL_ADMINS", False): + return SimpleAuthManagerUser(username="anonymous", role="admin") + else: + return session["user"] def is_authorized_configuration( self, diff --git a/airflow/callbacks/callback_requests.py b/airflow/callbacks/callback_requests.py index 7158c45d44d911..07ad648e9630f9 100644 --- a/airflow/callbacks/callback_requests.py +++ b/airflow/callbacks/callback_requests.py @@ -137,23 +137,3 @@ def __init__( self.dag_id = dag_id self.run_id = run_id self.is_failure_callback = is_failure_callback - - -class SlaCallbackRequest(CallbackRequest): - """ - A class with information about the SLA callback to be executed. - - :param full_filepath: File Path to use to run the callback - :param dag_id: DAG ID - :param processor_subdir: Directory used by Dag Processor when parsed the dag. - """ - - def __init__( - self, - full_filepath: str, - dag_id: str, - processor_subdir: str | None, - msg: str | None = None, - ): - super().__init__(full_filepath, processor_subdir=processor_subdir, msg=msg) - self.dag_id = dag_id diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 068b19df079ecc..c9abee3c850655 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -395,13 +395,6 @@ core: type: integer example: ~ default: "30" - check_slas: - description: | - On each dagrun check against defined SLAs - version_added: 1.10.8 - type: string - example: ~ - default: "True" xcom_backend: description: | Path to custom XCom class that will be used to store and resolve operators results @@ -1238,6 +1231,13 @@ metrics: type: string example: ~ default: "False" + otel_service: + description: | + The default service name of traces. + version_added: 2.10.3 + type: string + example: ~ + default: "Airflow" otel_ssl_active: description: | If ``True``, SSL will be enabled. Defaults to ``False``. @@ -2566,6 +2566,16 @@ scheduler: example: ~ default: "True" see_also: ":ref:`Differences between the two cron timetables`" + enable_tracemalloc: + description: | + Whether to enable memory allocation tracing in the scheduler. If enabled, Airflow will start + tracing memory allocation and log the top 10 memory usages at the error level upon receiving the + signal SIGUSR1. + This is an expensive operation and generally should not be used except for debugging purposes. + version_added: 3.0.0 + type: boolean + example: ~ + default: "False" triggerer: description: ~ options: diff --git a/airflow/config_templates/default_webserver_config.py b/airflow/config_templates/default_webserver_config.py index 5ef855ae4ab7d8..dda8a25ad9493f 100644 --- a/airflow/config_templates/default_webserver_config.py +++ b/airflow/config_templates/default_webserver_config.py @@ -147,3 +147,6 @@ "role": "admin", } ] + +# Turn this flag on to disable authentication and allow everyone as admin +SIMPLE_AUTH_MANAGER_ALL_ADMINS = False diff --git a/airflow/dag_processing/collection.py b/airflow/dag_processing/collection.py index 3f75e0b23bbfd5..bcac479d875a3b 100644 --- a/airflow/dag_processing/collection.py +++ b/airflow/dag_processing/collection.py @@ -61,46 +61,28 @@ log = logging.getLogger(__name__) -def collect_orm_dags(dags: dict[str, DAG], *, session: Session) -> dict[str, DagModel]: - """ - Collect DagModel objects from DAG objects. - - An existing DagModel is fetched if there's a matching ID in the database. - Otherwise, a new DagModel is created and added to the session. - """ +def _find_orm_dags(dag_ids: Iterable[str], *, session: Session) -> dict[str, DagModel]: + """Find existing DagModel objects from DAG objects.""" stmt = ( select(DagModel) .options(joinedload(DagModel.tags, innerjoin=False)) - .where(DagModel.dag_id.in_(dags)) + .where(DagModel.dag_id.in_(dag_ids)) .options(joinedload(DagModel.schedule_dataset_references)) .options(joinedload(DagModel.schedule_dataset_alias_references)) .options(joinedload(DagModel.task_outlet_dataset_references)) ) stmt = with_row_locks(stmt, of=DagModel, session=session) - existing_orm_dags = {dm.dag_id: dm for dm in session.scalars(stmt).unique()} + return {dm.dag_id: dm for dm in session.scalars(stmt).unique()} + - for dag_id, dag in dags.items(): - if dag_id in existing_orm_dags: - continue - orm_dag = DagModel(dag_id=dag_id) +def _create_orm_dags(dags: Iterable[DAG], *, session: Session) -> Iterator[DagModel]: + for dag in dags: + orm_dag = DagModel(dag_id=dag.dag_id) if dag.is_paused_upon_creation is not None: orm_dag.is_paused = dag.is_paused_upon_creation - orm_dag.tags = [] - log.info("Creating ORM DAG for %s", dag_id) + log.info("Creating ORM DAG for %s", dag.dag_id) session.add(orm_dag) - existing_orm_dags[dag_id] = orm_dag - - return existing_orm_dags - - -def create_orm_dag(dag: DAG, session: Session) -> DagModel: - orm_dag = DagModel(dag_id=dag.dag_id) - if dag.is_paused_upon_creation is not None: - orm_dag.is_paused = dag.is_paused_upon_creation - orm_dag.tags = [] - log.info("Creating ORM DAG for %s", dag.dag_id) - session.add(orm_dag) - return orm_dag + yield orm_dag def _get_latest_runs_stmt(dag_ids: Collection[str]) -> Select: @@ -158,75 +140,101 @@ def calculate(cls, dags: dict[str, DAG], *, session: Session) -> Self: ) -def update_orm_dags( - source_dags: dict[str, DAG], - target_dags: dict[str, DagModel], - *, - processor_subdir: str | None = None, - session: Session, -) -> None: - """ - Apply DAG attributes to DagModel objects. - - Objects in ``target_dags`` are modified in-place. - """ - run_info = _RunInfo.calculate(source_dags, session=session) - - for dag_id, dm in sorted(target_dags.items()): - dag = source_dags[dag_id] - dm.fileloc = dag.fileloc - dm.owners = dag.owner - dm.is_active = True - dm.has_import_errors = False - dm.last_parsed_time = utcnow() - dm.default_view = dag.default_view - dm._dag_display_property_value = dag._dag_display_property_value - dm.description = dag.description - dm.max_active_tasks = dag.max_active_tasks - dm.max_active_runs = dag.max_active_runs - dm.max_consecutive_failed_dag_runs = dag.max_consecutive_failed_dag_runs - dm.has_task_concurrency_limits = any( - t.max_active_tis_per_dag is not None or t.max_active_tis_per_dagrun is not None for t in dag.tasks - ) - dm.timetable_summary = dag.timetable.summary - dm.timetable_description = dag.timetable.description - dm.dataset_expression = dag.timetable.dataset_condition.as_expression() - dm.processor_subdir = processor_subdir - - last_automated_run: DagRun | None = run_info.latest_runs.get(dag.dag_id) - if last_automated_run is None: - last_automated_data_interval = None - else: - last_automated_data_interval = dag.get_run_data_interval(last_automated_run) - if run_info.num_active_runs.get(dag.dag_id, 0) >= dm.max_active_runs: - dm.next_dagrun_create_after = None +def _update_dag_tags(tag_names: set[str], dm: DagModel, *, session: Session) -> None: + orm_tags = {t.name: t for t in dm.tags} + for name, orm_tag in orm_tags.items(): + if name not in tag_names: + session.delete(orm_tag) + dm.tags.extend(DagTag(name=name, dag_id=dm.dag_id) for name in tag_names.difference(orm_tags)) + + +def _update_dag_owner_links(dag_owner_links: dict[str, str], dm: DagModel, *, session: Session) -> None: + orm_dag_owner_attributes = {obj.owner: obj for obj in dm.dag_owner_links} + for owner, obj in orm_dag_owner_attributes.items(): + try: + link = dag_owner_links[owner] + except KeyError: + session.delete(obj) else: - dm.calculate_dagrun_date_fields(dag, last_automated_data_interval) - - if not dag.timetable.dataset_condition: - dm.schedule_dataset_references = [] - dm.schedule_dataset_alias_references = [] - # FIXME: STORE NEW REFERENCES. - - dag_tags = set(dag.tags or ()) - for orm_tag in (dm_tags := list(dm.tags or [])): - if orm_tag.name not in dag_tags: - session.delete(orm_tag) - dm.tags.remove(orm_tag) - orm_tag_names = {t.name for t in dm_tags} - for dag_tag in dag_tags: - if dag_tag not in orm_tag_names: - dag_tag_orm = DagTag(name=dag_tag, dag_id=dag.dag_id) - dm.tags.append(dag_tag_orm) - session.add(dag_tag_orm) - - dm_links = dm.dag_owner_links or [] - for dm_link in dm_links: - if dm_link not in dag.owner_links: - session.delete(dm_link) - for owner_name, owner_link in dag.owner_links.items(): - dag_owner_orm = DagOwnerAttributes(dag_id=dag.dag_id, owner=owner_name, link=owner_link) - session.add(dag_owner_orm) + if obj.link != link: + obj.link = link + dm.dag_owner_links.extend( + DagOwnerAttributes(dag_id=dm.dag_id, owner=owner, link=link) + for owner, link in dag_owner_links.items() + if owner not in orm_dag_owner_attributes + ) + + +class DagModelOperation(NamedTuple): + """Collect DAG objects and perform database operations for them.""" + + dags: dict[str, DAG] + + def add_dags(self, *, session: Session) -> dict[str, DagModel]: + orm_dags = _find_orm_dags(self.dags, session=session) + orm_dags.update( + (model.dag_id, model) + for model in _create_orm_dags( + (dag for dag_id, dag in self.dags.items() if dag_id not in orm_dags), + session=session, + ) + ) + return orm_dags + + def update_dags( + self, + orm_dags: dict[str, DagModel], + *, + processor_subdir: str | None = None, + session: Session, + ) -> None: + run_info = _RunInfo.calculate(self.dags, session=session) + + for dag_id, dm in sorted(orm_dags.items()): + dag = self.dags[dag_id] + dm.fileloc = dag.fileloc + dm.owners = dag.owner + dm.is_active = True + dm.has_import_errors = False + dm.last_parsed_time = utcnow() + dm.default_view = dag.default_view + dm._dag_display_property_value = dag._dag_display_property_value + dm.description = dag.description + dm.max_active_tasks = dag.max_active_tasks + dm.max_active_runs = dag.max_active_runs + dm.max_consecutive_failed_dag_runs = dag.max_consecutive_failed_dag_runs + dm.has_task_concurrency_limits = any( + t.max_active_tis_per_dag is not None or t.max_active_tis_per_dagrun is not None + for t in dag.tasks + ) + dm.timetable_summary = dag.timetable.summary + dm.timetable_description = dag.timetable.description + dm.dataset_expression = dag.timetable.dataset_condition.as_expression() + dm.processor_subdir = processor_subdir + + last_automated_run: DagRun | None = run_info.latest_runs.get(dag.dag_id) + if last_automated_run is None: + last_automated_data_interval = None + else: + last_automated_data_interval = dag.get_run_data_interval(last_automated_run) + if run_info.num_active_runs.get(dag.dag_id, 0) >= dm.max_active_runs: + dm.next_dagrun_create_after = None + else: + dm.calculate_dagrun_date_fields(dag, last_automated_data_interval) + + if not dag.timetable.dataset_condition: + dm.schedule_dataset_references = [] + dm.schedule_dataset_alias_references = [] + # FIXME: STORE NEW REFERENCES. + + if dag.tags: + _update_dag_tags(set(dag.tags), dm, session=session) + else: # Optimization: no references at all, just clear everything. + dm.tags = [] + if dag.owner_links: + _update_dag_owner_links(dag.owner_links, dm, session=session) + else: # Optimization: no references at all, just clear everything. + dm.dag_owner_links = [] def _find_all_datasets(dags: Iterable[DAG]) -> Iterator[Dataset]: @@ -291,21 +299,15 @@ def add_datasets(self, *, session: Session) -> dict[str, DatasetModel]: dm.uri: dm for dm in session.scalars(select(DatasetModel).where(DatasetModel.uri.in_(self.datasets))) } - - def _resolve_dataset_addition() -> Iterator[DatasetModel]: - for uri, dataset in self.datasets.items(): - try: - dm = orm_datasets[uri] - except KeyError: - dm = orm_datasets[uri] = DatasetModel.from_public(dataset) - yield dm - else: - # The orphaned flag was bulk-set to True before parsing, so we - # don't need to handle rows in the db without a public entry. - dm.is_orphaned = expression.false() - dm.extra = dataset.extra - - dataset_manager.create_datasets(list(_resolve_dataset_addition()), session=session) + for model in orm_datasets.values(): + model.is_orphaned = expression.false() + orm_datasets.update( + (model.uri, model) + for model in dataset_manager.create_datasets( + [dataset for uri, dataset in self.datasets.items() if uri not in orm_datasets], + session=session, + ) + ) return orm_datasets def add_dataset_aliases(self, *, session: Session) -> dict[str, DatasetAliasModel]: @@ -318,12 +320,13 @@ def add_dataset_aliases(self, *, session: Session) -> dict[str, DatasetAliasMode select(DatasetAliasModel).where(DatasetAliasModel.name.in_(self.dataset_aliases)) ) } - for name, alias in self.dataset_aliases.items(): - try: - da = orm_aliases[name] - except KeyError: - da = orm_aliases[name] = DatasetAliasModel.from_public(alias) - session.add(da) + orm_aliases.update( + (model.name, model) + for model in dataset_manager.create_dataset_aliases( + [alias for name, alias in self.dataset_aliases.items() if name not in orm_aliases], + session=session, + ) + ) return orm_aliases def add_dag_dataset_references( diff --git a/airflow/dag_processing/manager.py b/airflow/dag_processing/manager.py index 6df8060f3a311b..05fb72daee6020 100644 --- a/airflow/dag_processing/manager.py +++ b/airflow/dag_processing/manager.py @@ -42,7 +42,7 @@ import airflow.models from airflow.api_internal.internal_api_call import internal_api_call -from airflow.callbacks.callback_requests import CallbackRequest, SlaCallbackRequest +from airflow.callbacks.callback_requests import CallbackRequest from airflow.configuration import conf from airflow.dag_processing.processor import DagFileProcessorProcess from airflow.models.dag import DagModel @@ -752,40 +752,17 @@ def _fetch_callbacks_with_retries( return callback_queue def _add_callback_to_queue(self, request: CallbackRequest): - # requests are sent by dag processors. SLAs exist per-dag, but can be generated once per SLA-enabled - # task in the dag. If treated like other callbacks, SLAs can cause feedback where a SLA arrives, - # goes to the front of the queue, gets processed, triggers more SLAs from the same DAG, which go to - # the front of the queue, and we never get round to picking stuff off the back of the queue - if isinstance(request, SlaCallbackRequest): - if request in self._callback_to_execute[request.full_filepath]: - self.log.debug("Skipping already queued SlaCallbackRequest") - return - - # not already queued, queue the callback - # do NOT add the file of this SLA to self._file_path_queue. SLAs can arrive so rapidly that - # they keep adding to the file queue and never letting it drain. This in turn prevents us from - # ever rescanning the dags folder for changes to existing dags. We simply store the callback, and - # periodically, when self._file_path_queue is drained, we rescan and re-queue all DAG files. - # The SLAs will be picked up then. It means a delay in reacting to the SLAs (as controlled by the - # min_file_process_interval config) but stops SLAs from DoS'ing the queue. - self.log.debug("Queuing SlaCallbackRequest for %s", request.dag_id) - self._callback_to_execute[request.full_filepath].append(request) - Stats.incr("dag_processing.sla_callback_count") - - # Other callbacks have a higher priority over DAG Run scheduling, so those callbacks gazump, even if - # already in the file path queue - else: - self.log.debug("Queuing %s CallbackRequest: %s", type(request).__name__, request) - self._callback_to_execute[request.full_filepath].append(request) - if request.full_filepath in self._file_path_queue: - # Remove file paths matching request.full_filepath from self._file_path_queue - # Since we are already going to use that filepath to run callback, - # there is no need to have same file path again in the queue - self._file_path_queue = deque( - file_path for file_path in self._file_path_queue if file_path != request.full_filepath - ) - self._add_paths_to_queue([request.full_filepath], True) - Stats.incr("dag_processing.other_callback_count") + self.log.debug("Queuing %s CallbackRequest: %s", type(request).__name__, request) + self._callback_to_execute[request.full_filepath].append(request) + if request.full_filepath in self._file_path_queue: + # Remove file paths matching request.full_filepath from self._file_path_queue + # Since we are already going to use that filepath to run callback, + # there is no need to have same file path again in the queue + self._file_path_queue = deque( + file_path for file_path in self._file_path_queue if file_path != request.full_filepath + ) + self._add_paths_to_queue([request.full_filepath], True) + Stats.incr("dag_processing.other_callback_count") def _refresh_requested_filelocs(self) -> None: """Refresh filepaths from dag dir as requested by users via APIs.""" diff --git a/airflow/dag_processing/processor.py b/airflow/dag_processing/processor.py index 0b19d8f2db76ce..f030cb75019e5d 100644 --- a/airflow/dag_processing/processor.py +++ b/airflow/dag_processing/processor.py @@ -25,33 +25,28 @@ import zipfile from contextlib import contextmanager, redirect_stderr, redirect_stdout, suppress from dataclasses import dataclass -from datetime import timedelta -from typing import TYPE_CHECKING, Generator, Iterable, Iterator +from typing import TYPE_CHECKING, Generator, Iterable from setproctitle import setproctitle -from sqlalchemy import delete, event, func, or_, select +from sqlalchemy import delete, event from airflow import settings -from airflow.api_internal.internal_api_call import InternalApiConfig, internal_api_call +from airflow.api_internal.internal_api_call import internal_api_call from airflow.callbacks.callback_requests import ( DagCallbackRequest, - SlaCallbackRequest, TaskCallbackRequest, ) from airflow.configuration import conf -from airflow.exceptions import AirflowException, TaskNotFound +from airflow.exceptions import AirflowException from airflow.listeners.listener import get_listener_manager -from airflow.models import SlaMiss from airflow.models.dag import DAG, DagModel from airflow.models.dagbag import DagBag -from airflow.models.dagrun import DagRun as DR from airflow.models.dagwarning import DagWarning, DagWarningType from airflow.models.errors import ParseImportError from airflow.models.serialized_dag import SerializedDagModel -from airflow.models.taskinstance import TaskInstance, TaskInstance as TI, _run_finished_callback +from airflow.models.taskinstance import TaskInstance, _run_finished_callback from airflow.stats import Stats from airflow.utils import timezone -from airflow.utils.email import get_email_address_list, send_email from airflow.utils.file import iter_airflow_imports, might_contain_dag from airflow.utils.log.logging_mixin import LoggingMixin, StreamLogWriter, set_context from airflow.utils.mixins import MultiprocessingStartMethodMixin @@ -440,180 +435,6 @@ def __init__(self, dag_ids: list[str] | None, dag_directory: str, log: logging.L self.dag_warnings: set[tuple[str, str]] = set() self._last_num_of_db_queries = 0 - @classmethod - @internal_api_call - @provide_session - def manage_slas(cls, dag_folder, dag_id: str, session: Session = NEW_SESSION) -> None: - """ - Find all tasks that have SLAs defined, and send alert emails when needed. - - New SLA misses are also recorded in the database. - - We are assuming that the scheduler runs often, so we only check for - tasks that should have succeeded in the past hour. - """ - dagbag = DagFileProcessor._get_dagbag(dag_folder) - dag = dagbag.get_dag(dag_id) - cls.logger().info("Running SLA Checks for %s", dag.dag_id) - if not any(isinstance(ti.sla, timedelta) for ti in dag.tasks): - cls.logger().info("Skipping SLA check for %s because no tasks in DAG have SLAs", dag) - return - qry = ( - select(TI.task_id, func.max(DR.execution_date).label("max_ti")) - .join(TI.dag_run) - .where(TI.dag_id == dag.dag_id) - .where(or_(TI.state == TaskInstanceState.SUCCESS, TI.state == TaskInstanceState.SKIPPED)) - .where(TI.task_id.in_(dag.task_ids)) - .group_by(TI.task_id) - .subquery("sq") - ) - # get recorded SlaMiss - recorded_slas_query = set( - session.execute( - select(SlaMiss.dag_id, SlaMiss.task_id, SlaMiss.execution_date).where( - SlaMiss.dag_id == dag.dag_id, SlaMiss.task_id.in_(dag.task_ids) - ) - ) - ) - max_tis: Iterator[TI] = session.scalars( - select(TI) - .join(TI.dag_run) - .where(TI.dag_id == dag.dag_id, TI.task_id == qry.c.task_id, DR.execution_date == qry.c.max_ti) - ) - - ts = timezone.utcnow() - - for ti in max_tis: - task = dag.get_task(ti.task_id) - if not task.sla: - continue - - if not isinstance(task.sla, timedelta): - raise TypeError( - f"SLA is expected to be timedelta object, got " - f"{type(task.sla)} in {task.dag_id}:{task.task_id}" - ) - - sla_misses = [] - next_info = dag.next_dagrun_info(dag.get_run_data_interval(ti.dag_run), restricted=False) - while next_info and next_info.logical_date < ts: - next_info = dag.next_dagrun_info(next_info.data_interval, restricted=False) - - if next_info is None: - break - if (ti.dag_id, ti.task_id, next_info.logical_date) in recorded_slas_query: - continue - if next_info.logical_date + task.sla < ts: - sla_miss = SlaMiss( - task_id=ti.task_id, - dag_id=ti.dag_id, - execution_date=next_info.logical_date, - timestamp=ts, - ) - sla_misses.append(sla_miss) - Stats.incr("sla_missed", tags={"dag_id": ti.dag_id, "task_id": ti.task_id}) - if sla_misses: - session.add_all(sla_misses) - session.commit() - slas: list[SlaMiss] = session.scalars( - select(SlaMiss).where(~SlaMiss.notification_sent, SlaMiss.dag_id == dag.dag_id) - ).all() - if slas: - sla_dates: list[datetime] = [sla.execution_date for sla in slas] - fetched_tis: list[TI] = session.scalars( - select(TI).where( - TI.dag_id == dag.dag_id, - TI.execution_date.in_(sla_dates), - TI.state != TaskInstanceState.SUCCESS, - ) - ).all() - blocking_tis: list[TI] = [] - for ti in fetched_tis: - if ti.task_id in dag.task_ids: - ti.task = dag.get_task(ti.task_id) - blocking_tis.append(ti) - else: - session.delete(ti) - session.commit() - - task_list = "\n".join(sla.task_id + " on " + sla.execution_date.isoformat() for sla in slas) - blocking_task_list = "\n".join( - ti.task_id + " on " + ti.execution_date.isoformat() for ti in blocking_tis - ) - # Track whether email or any alert notification sent - # We consider email or the alert callback as notifications - email_sent = False - notification_sent = False - if dag.sla_miss_callback: - # Execute the alert callback - callbacks = ( - dag.sla_miss_callback - if isinstance(dag.sla_miss_callback, list) - else [dag.sla_miss_callback] - ) - for callback in callbacks: - cls.logger().info("Calling SLA miss callback %s", callback) - try: - callback(dag, task_list, blocking_task_list, slas, blocking_tis) - notification_sent = True - except Exception: - Stats.incr( - "sla_callback_notification_failure", - tags={ - "dag_id": dag.dag_id, - "func_name": callback.__name__, - }, - ) - cls.logger().exception( - "Could not call sla_miss_callback(%s) for DAG %s", - callback.__name__, - dag.dag_id, - ) - email_content = f"""\ - Here's a list of tasks that missed their SLAs: -
{task_list}\n
- Blocking tasks: -
{blocking_task_list}
- Airflow Webserver URL: {conf.get(section='webserver', key='base_url')} - """ - - tasks_missed_sla = [] - for sla in slas: - try: - task = dag.get_task(sla.task_id) - except TaskNotFound: - # task already deleted from DAG, skip it - cls.logger().warning( - "Task %s doesn't exist in DAG anymore, skipping SLA miss notification.", sla.task_id - ) - else: - tasks_missed_sla.append(task) - - emails: set[str] = set() - for task in tasks_missed_sla: - if task.email: - if isinstance(task.email, str): - emails.update(get_email_address_list(task.email)) - elif isinstance(task.email, (list, tuple)): - emails.update(task.email) - if emails: - try: - send_email(emails, f"[airflow] SLA miss on DAG={dag.dag_id}", email_content) - email_sent = True - notification_sent = True - except Exception: - Stats.incr("sla_email_notification_failure", tags={"dag_id": dag.dag_id}) - cls.logger().exception( - "Could not send SLA Miss email notification for DAG %s", dag.dag_id - ) - # If we sent any notification, update the sla_miss table - if notification_sent: - for sla in slas: - sla.email_sent = email_sent - sla.notification_sent = True - session.merge(sla) - session.commit() - @staticmethod @internal_api_call @provide_session @@ -748,13 +569,6 @@ def execute_callbacks( try: if isinstance(request, TaskCallbackRequest): cls._execute_task_callbacks(dagbag, request, unit_test_mode, session=session) - elif isinstance(request, SlaCallbackRequest): - if InternalApiConfig.get_use_internal_api(): - cls.logger().warning( - "SlaCallbacks are not supported when the Internal API is enabled" - ) - else: - DagFileProcessor.manage_slas(dagbag.dag_folder, request.dag_id, session=session) elif isinstance(request, DagCallbackRequest): cls._execute_dag_callbacks(dagbag, request, session=session) except Exception: diff --git a/airflow/datasets/manager.py b/airflow/datasets/manager.py index 19f6913fffbeb8..6322414bb8499e 100644 --- a/airflow/datasets/manager.py +++ b/airflow/datasets/manager.py @@ -17,7 +17,7 @@ # under the License. from __future__ import annotations -from collections.abc import Iterable +from collections.abc import Collection, Iterable from typing import TYPE_CHECKING from sqlalchemy import exc, select @@ -25,7 +25,6 @@ from airflow.api_internal.internal_api_call import internal_api_call from airflow.configuration import conf -from airflow.datasets import Dataset from airflow.listeners.listener import get_listener_manager from airflow.models.dagbag import DagPriorityParsingRequest from airflow.models.dataset import ( @@ -38,11 +37,11 @@ ) from airflow.stats import Stats from airflow.utils.log.logging_mixin import LoggingMixin -from airflow.utils.session import NEW_SESSION, provide_session if TYPE_CHECKING: from sqlalchemy.orm.session import Session + from airflow.datasets import Dataset, DatasetAlias from airflow.models.dag import DagModel from airflow.models.taskinstance import TaskInstance @@ -55,27 +54,65 @@ class DatasetManager(LoggingMixin): Airflow deployments can use plugins that broadcast dataset events to each other. """ - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def create_datasets(self, dataset_models: list[DatasetModel], session: Session) -> None: + @classmethod + def create_datasets(cls, datasets: list[Dataset], *, session: Session) -> list[DatasetModel]: """Create new datasets.""" - for dataset_model in dataset_models: - session.add(dataset_model) - for dataset_model in dataset_models: - self.notify_dataset_created(dataset=Dataset(uri=dataset_model.uri, extra=dataset_model.extra)) + + def _add_one(dataset: Dataset) -> DatasetModel: + model = DatasetModel.from_public(dataset) + session.add(model) + cls.notify_dataset_created(dataset=dataset) + return model + + return [_add_one(d) for d in datasets] + + @classmethod + def create_dataset_aliases( + cls, + dataset_aliases: list[DatasetAlias], + *, + session: Session, + ) -> list[DatasetAliasModel]: + """Create new dataset aliases.""" + + def _add_one(dataset_alias: DatasetAlias) -> DatasetAliasModel: + model = DatasetAliasModel.from_public(dataset_alias) + session.add(model) + cls.notify_dataset_alias_created(dataset_alias=dataset_alias) + return model + + return [_add_one(a) for a in dataset_aliases] + + @classmethod + def _add_dataset_alias_association( + cls, + alias_names: Collection[str], + dataset: DatasetModel, + *, + session: Session, + ) -> None: + already_related = {m.name for m in dataset.aliases} + existing_aliases = { + m.name: m + for m in session.scalars(select(DatasetAliasModel).where(DatasetAliasModel.name.in_(alias_names))) + } + dataset.aliases.extend( + existing_aliases.get(name, DatasetAliasModel(name=name)) + for name in alias_names + if name not in already_related + ) @classmethod @internal_api_call - @provide_session def register_dataset_change( cls, *, task_instance: TaskInstance | None = None, dataset: Dataset, extra=None, - session: Session = NEW_SESSION, + aliases: Collection[DatasetAlias] = (), source_alias_names: Iterable[str] | None = None, + session: Session, **kwargs, ) -> DatasetEvent | None: """ @@ -88,28 +125,32 @@ def register_dataset_change( dataset_model = session.scalar( select(DatasetModel) .where(DatasetModel.uri == dataset.uri) - .options(joinedload(DatasetModel.consuming_dags).joinedload(DagScheduleDatasetReference.dag)) + .options( + joinedload(DatasetModel.aliases), + joinedload(DatasetModel.consuming_dags).joinedload(DagScheduleDatasetReference.dag), + ) ) if not dataset_model: cls.logger().warning("DatasetModel %s not found", dataset) return None + cls._add_dataset_alias_association({alias.name for alias in aliases}, dataset_model, session=session) + event_kwargs = { "dataset_id": dataset_model.id, "extra": extra, } if task_instance: event_kwargs.update( - { - "source_task_id": task_instance.task_id, - "source_dag_id": task_instance.dag_id, - "source_run_id": task_instance.run_id, - "source_map_index": task_instance.map_index, - } + source_task_id=task_instance.task_id, + source_dag_id=task_instance.dag_id, + source_run_id=task_instance.run_id, + source_map_index=task_instance.map_index, ) dataset_event = DatasetEvent(**event_kwargs) session.add(dataset_event) + session.flush() # Ensure the event is written earlier than DDRQ entries below. dags_to_queue_from_dataset = { ref.dag for ref in dataset_model.consuming_dags if ref.dag.is_active and not ref.dag.is_paused @@ -140,7 +181,6 @@ def register_dataset_change( if dags_to_reparse: file_locs = {dag.fileloc for dag in dags_to_reparse} cls._send_dag_priority_parsing_request(file_locs, session) - session.flush() cls.notify_dataset_changed(dataset=dataset) @@ -148,15 +188,20 @@ def register_dataset_change( dags_to_queue = dags_to_queue_from_dataset | dags_to_queue_from_dataset_alias cls._queue_dagruns(dataset_id=dataset_model.id, dags_to_queue=dags_to_queue, session=session) - session.flush() return dataset_event - def notify_dataset_created(self, dataset: Dataset): + @staticmethod + def notify_dataset_created(dataset: Dataset): """Run applicable notification actions when a dataset is created.""" get_listener_manager().hook.on_dataset_created(dataset=dataset) - @classmethod - def notify_dataset_changed(cls, dataset: Dataset): + @staticmethod + def notify_dataset_alias_created(dataset_alias: DatasetAlias): + """Run applicable notification actions when a dataset alias is created.""" + get_listener_manager().hook.on_dataset_alias_created(dataset_alias=dataset_alias) + + @staticmethod + def notify_dataset_changed(dataset: Dataset): """Run applicable notification actions when a dataset is changed.""" get_listener_manager().hook.on_dataset_changed(dataset=dataset) diff --git a/airflow/example_dags/example_sla_dag.py b/airflow/example_dags/example_sla_dag.py deleted file mode 100644 index aca1277e88799a..00000000000000 --- a/airflow/example_dags/example_sla_dag.py +++ /dev/null @@ -1,66 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Example DAG demonstrating SLA use in Tasks""" - -from __future__ import annotations - -import datetime -import time - -import pendulum - -from airflow.decorators import dag, task - - -# [START howto_task_sla] -def sla_callback(dag, task_list, blocking_task_list, slas, blocking_tis): - print( - "The callback arguments are: ", - { - "dag": dag, - "task_list": task_list, - "blocking_task_list": blocking_task_list, - "slas": slas, - "blocking_tis": blocking_tis, - }, - ) - - -@dag( - schedule="*/2 * * * *", - start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), - catchup=False, - sla_miss_callback=sla_callback, - default_args={"email": "email@example.com"}, -) -def example_sla_dag(): - @task(sla=datetime.timedelta(seconds=10)) - def sleep_20(): - """Sleep for 20 seconds""" - time.sleep(20) - - @task - def sleep_30(): - """Sleep for 30 seconds""" - time.sleep(30) - - sleep_20() >> sleep_30() - - -example_dag = example_sla_dag() - -# [END howto_task_sla] diff --git a/airflow/executors/base_executor.py b/airflow/executors/base_executor.py index ad7690b3f6e1a0..87f496fb054063 100644 --- a/airflow/executors/base_executor.py +++ b/airflow/executors/base_executor.py @@ -537,11 +537,11 @@ def get_task_log(self, ti: TaskInstance, try_number: int) -> tuple[list[str], li def end(self) -> None: # pragma: no cover """Wait synchronously for the previously submitted job to complete.""" - raise NotImplementedError() + raise NotImplementedError def terminate(self): """Get called when the daemon receives a SIGTERM.""" - raise NotImplementedError() + raise NotImplementedError def cleanup_stuck_queued_tasks(self, tis: list[TaskInstance]) -> list[str]: # pragma: no cover """ diff --git a/airflow/jobs/local_task_job_runner.py b/airflow/jobs/local_task_job_runner.py index 95a471f239a667..cdc3c1b624694e 100644 --- a/airflow/jobs/local_task_job_runner.py +++ b/airflow/jobs/local_task_job_runner.py @@ -261,8 +261,8 @@ def handle_task_exit(self, return_code: int) -> None: _set_task_deferred_context_var() else: message = f"Task exited with return code {return_code}" - if return_code == -signal.SIGKILL: - message += "For more information, see https://airflow.apache.org/docs/apache-airflow/stable/troubleshooting.html#LocalTaskJob-killed" + if not IS_WINDOWS and return_code == -signal.SIGKILL: + message += ". For more information, see https://airflow.apache.org/docs/apache-airflow/stable/troubleshooting.html#LocalTaskJob-killed" self.log.info(message) if not (self.task_instance.test_mode or is_deferral): diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index 6b4a73035801b6..9438edd4d91870 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -36,7 +36,7 @@ from sqlalchemy.sql import expression from airflow import settings -from airflow.callbacks.callback_requests import DagCallbackRequest, SlaCallbackRequest, TaskCallbackRequest +from airflow.callbacks.callback_requests import DagCallbackRequest, TaskCallbackRequest from airflow.callbacks.pipe_callback_sink import PipeCallbackSink from airflow.configuration import conf from airflow.exceptions import UnknownExecutorException @@ -181,6 +181,12 @@ def __init__( self.do_pickle = do_pickle + self._enable_tracemalloc = conf.getboolean("scheduler", "enable_tracemalloc") + if self._enable_tracemalloc: + import tracemalloc + + tracemalloc.start() + if log: self._log = log @@ -202,17 +208,37 @@ def register_signals(self) -> None: signal.signal(signal.SIGTERM, self._exit_gracefully) signal.signal(signal.SIGUSR2, self._debug_dump) + if self._enable_tracemalloc: + signal.signal(signal.SIGUSR1, self._log_memory_usage) + def _exit_gracefully(self, signum: int, frame: FrameType | None) -> None: """Clean up processor_agent to avoid leaving orphan processes.""" if not _is_parent_process(): # Only the parent process should perform the cleanup. return + if self._enable_tracemalloc: + import tracemalloc + + tracemalloc.stop() + self.log.info("Exiting gracefully upon receiving signal %s", signum) if self.processor_agent: self.processor_agent.end() sys.exit(os.EX_OK) + def _log_memory_usage(self, signum: int, frame: FrameType | None) -> None: + import tracemalloc + + snapshot = tracemalloc.take_snapshot() + top_stats = snapshot.statistics("lineno") + n = 10 + self.log.error( + "scheduler memory usgae:\n Top %d\n %s", + n, + "\n\t".join(map(str, top_stats[:n])), + ) + def _debug_dump(self, signum: int, frame: FrameType | None) -> None: if not _is_parent_process(): # Only the parent process should perform the debug dump. @@ -1192,9 +1218,10 @@ def _do_scheduling(self, session: Session) -> int: self._start_queued_dagruns(session) guard.commit() - dag_runs = self._get_next_dagruns_to_examine(DagRunState.RUNNING, session) + # Bulk fetch the currently active dag runs for the dags we are # examining, rather than making one query per DagRun + dag_runs = DagRun.get_running_dag_runs_to_examine(session=session) callback_tuples = self._schedule_all_dag_runs(guard, dag_runs, session) @@ -1248,11 +1275,6 @@ def _do_scheduling(self, session: Session) -> int: return num_queued_tis - @retry_db_transaction - def _get_next_dagruns_to_examine(self, state: DagRunState, session: Session) -> Query: - """Get Next DagRuns to Examine with retries.""" - return DagRun.next_dagruns_to_examine(state, session) - @retry_db_transaction def _create_dagruns_for_dags(self, guard: CommitProhibitorGuard, session: Session) -> None: """Find Dag Models needing DagRuns and Create Dag Runs with retries in case of OperationalError.""" @@ -1486,7 +1508,7 @@ def _should_update_dag_next_dagruns( def _start_queued_dagruns(self, session: Session) -> None: """Find DagRuns in queued state and decide moving them to running state.""" # added all() to save runtime, otherwise query is executed more than once - dag_runs: Collection[DagRun] = self._get_next_dagruns_to_examine(DagRunState.QUEUED, session).all() + dag_runs: Collection[DagRun] = DagRun.get_queued_dag_runs_to_set_running(session).all() active_runs_of_dags = Counter( DagRun.active_runs_of_dags((dr.dag_id for dr in dag_runs), only_running=True, session=session), @@ -1702,37 +1724,11 @@ def _verify_integrity_if_dag_changed(self, dag_run: DagRun, session: Session) -> return True def _send_dag_callbacks_to_processor(self, dag: DAG, callback: DagCallbackRequest | None = None) -> None: - self._send_sla_callbacks_to_processor(dag) if callback: self.job.executor.send_callback(callback) else: self.log.debug("callback is empty") - def _send_sla_callbacks_to_processor(self, dag: DAG) -> None: - """Send SLA Callbacks to DagFileProcessor if tasks have SLAs set and check_slas=True.""" - if not settings.CHECK_SLAS: - return - - if not any(isinstance(task.sla, timedelta) for task in dag.tasks): - self.log.debug("Skipping SLA check for %s because no tasks in DAG have SLAs", dag) - return - - if not dag.timetable.periodic: - self.log.debug("Skipping SLA check for %s because DAG is not scheduled", dag) - return - - dag_model = DagModel.get_dagmodel(dag.dag_id) - if not dag_model: - self.log.error("Couldn't find DAG %s in database!", dag.dag_id) - return - - request = SlaCallbackRequest( - full_filepath=dag.fileloc, - dag_id=dag.dag_id, - processor_subdir=dag_model.processor_subdir, - ) - self.job.executor.send_callback(request) - @provide_session def _fail_tasks_stuck_in_queued(self, session: Session = NEW_SESSION) -> None: """ diff --git a/airflow/listeners/spec/dataset.py b/airflow/listeners/spec/dataset.py index 214ddad3ffb13b..eee1a10dd7d897 100644 --- a/airflow/listeners/spec/dataset.py +++ b/airflow/listeners/spec/dataset.py @@ -22,7 +22,7 @@ from pluggy import HookspecMarker if TYPE_CHECKING: - from airflow.datasets import Dataset + from airflow.datasets import Dataset, DatasetAlias hookspec = HookspecMarker("airflow") @@ -34,6 +34,13 @@ def on_dataset_created( """Execute when a new dataset is created.""" +@hookspec +def on_dataset_alias_created( + dataset_alias: DatasetAlias, +): + """Execute when a new dataset alias is created.""" + + @hookspec def on_dataset_changed( dataset: Dataset, diff --git a/airflow/metrics/otel_logger.py b/airflow/metrics/otel_logger.py index 594046703eb18d..14080eb2d8313c 100644 --- a/airflow/metrics/otel_logger.py +++ b/airflow/metrics/otel_logger.py @@ -408,8 +408,9 @@ def get_otel_logger(cls) -> SafeOtelLogger: # PeriodicExportingMetricReader will default to an interval of 60000 millis. interval = conf.getint("metrics", "otel_interval_milliseconds", fallback=None) # ex: 30000 debug = conf.getboolean("metrics", "otel_debugging_on") + service_name = conf.get("metrics", "otel_service") - resource = Resource(attributes={SERVICE_NAME: "Airflow"}) + resource = Resource(attributes={SERVICE_NAME: service_name}) protocol = "https" if ssl_active else "http" endpoint = f"{protocol}://{host}:{port}/v1/metrics" diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 8f95d1eee73029..20656586ba01ef 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -677,17 +677,7 @@ class derived from this one results in the creation of a task object, way to limit concurrency for certain tasks :param pool_slots: the number of pool slots this task should use (>= 1) Values less than 1 are not allowed. - :param sla: time by which the job is expected to succeed. Note that - this represents the ``timedelta`` after the period is closed. For - example if you set an SLA of 1 hour, the scheduler would send an email - soon after 1:00AM on the ``2016-01-02`` if the ``2016-01-01`` instance - has not succeeded yet. - The scheduler pays special attention for jobs with an SLA and - sends alert - emails for SLA misses. SLA misses are also recorded in the database - for future reference. All tasks that share the same SLA time - get bundled in a single email, sent soon after that time. SLA - notification are sent once and only once for each task instance. + :param sla: DEPRECATED - The SLA feature is removed in Airflow 3.0, to be replaced with a new implementation in 3.1 :param execution_timeout: max time allowed for the execution of this task instance, if it goes beyond it will raise and fail. :param on_failure_callback: a function or list of functions to be called when a task instance @@ -975,7 +965,11 @@ def __init__( if self.pool_slots < 1: dag_str = f" in dag {dag.dag_id}" if dag else "" raise ValueError(f"pool slots for {self.task_id}{dag_str} cannot be less than 1") - self.sla = sla + + if sla: + self.log.warning( + "The SLA feature is removed in Airflow 3.0, to be replaced with a new implementation in 3.1" + ) if not TriggerRule.is_valid(trigger_rule): raise AirflowException( diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 00820585b68a93..91f8aec7302cb0 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -28,6 +28,7 @@ import sys import time import traceback +import warnings import weakref from collections import abc, defaultdict, deque from contextlib import ExitStack @@ -41,7 +42,6 @@ Container, Iterable, Iterator, - List, MutableSet, Pattern, Sequence, @@ -88,6 +88,7 @@ DuplicateTaskIdFound, FailStopDagInvalidTriggerRule, ParamValidationError, + RemovedInAirflow3Warning, TaskDeferred, TaskNotFound, UnknownExecutorException, @@ -144,7 +145,6 @@ from airflow.decorators import TaskDecoratorCollection from airflow.models.dagbag import DagBag from airflow.models.operator import Operator - from airflow.models.slamiss import SlaMiss from airflow.serialization.pydantic.dag import DagModelPydantic from airflow.serialization.pydantic.dag_run import DagRunPydantic from airflow.typing_compat import Literal @@ -167,8 +167,6 @@ Collection[Union["Dataset", "DatasetAlias"]], ] -SLAMissCallback = Callable[["DAG", str, str, List["SlaMiss"], List[TaskInstance]], None] - class InconsistentDataInterval(AirflowException): """ @@ -428,10 +426,7 @@ class DAG(LoggingMixin): beyond this the scheduler will disable the DAG :param dagrun_timeout: Specify the duration a DagRun should be allowed to run before it times out or fails. Task instances that are running when a DagRun is timed out will be marked as skipped. - :param sla_miss_callback: specify a function or list of functions to call when reporting SLA - timeouts. See :ref:`sla_miss_callback` for - more information about the function signature and parameters that are - passed to the callback. + :param sla_miss_callback: DEPRECATED - The SLA feature is removed in Airflow 3.0, to be replaced with a new implementation in 3.1 :param default_view: Specify DAG default view (grid, graph, duration, gantt, landing_times), default grid :param orientation: Specify DAG orientation in graph view (LR, TB, RL, BT), default LR @@ -517,7 +512,7 @@ def __init__( "core", "max_consecutive_failed_dag_runs_per_dag" ), dagrun_timeout: timedelta | None = None, - sla_miss_callback: None | SLAMissCallback | list[SLAMissCallback] = None, + sla_miss_callback: Any = None, default_view: str = airflow_conf.get_mandatory_value("webserver", "dag_default_view").lower(), orientation: str = airflow_conf.get_mandatory_value("webserver", "dag_orientation"), catchup: bool = airflow_conf.getboolean("scheduler", "catchup_by_default"), @@ -637,7 +632,10 @@ def __init__( f"requires max_active_runs <= {self.timetable.active_runs_limit}" ) self.dagrun_timeout = dagrun_timeout - self.sla_miss_callback = sla_miss_callback + if sla_miss_callback: + log.warning( + "The SLA feature is removed in Airflow 3.0, to be replaced with a new implementation in 3.1" + ) if default_view in DEFAULT_VIEW_PRESETS: self._default_view: str = default_view else: @@ -2331,6 +2329,13 @@ def run( :param run_at_least_once: If true, always run the DAG at least once even if no logical run exists within the time range. """ + warnings.warn( + "`DAG.run()` is deprecated and will be removed in Airflow 3.0. Consider " + "using `DAG.test()` instead, or trigger your dag via API.", + RemovedInAirflow3Warning, + stacklevel=2, + ) + from airflow.executors.executor_loader import ExecutorLoader from airflow.jobs.backfill_job_runner import BackfillJobRunner @@ -2644,28 +2649,16 @@ def bulk_write_to_db( if not dags: return - from airflow.dag_processing.collection import ( - DatasetModelOperation, - collect_orm_dags, - create_orm_dag, - update_orm_dags, - ) + from airflow.dag_processing.collection import DagModelOperation, DatasetModelOperation log.info("Sync %s DAGs", len(dags)) - dags_by_ids = {dag.dag_id: dag for dag in dags} - del dags - - orm_dags = collect_orm_dags(dags_by_ids, session=session) - orm_dags.update( - (dag_id, create_orm_dag(dag, session=session)) - for dag_id, dag in dags_by_ids.items() - if dag_id not in orm_dags - ) + dag_op = DagModelOperation({dag.dag_id: dag for dag in dags}) - update_orm_dags(dags_by_ids, orm_dags, processor_subdir=processor_subdir, session=session) - DagCode.bulk_sync_to_db((dag.fileloc for dag in dags_by_ids.values()), session=session) + orm_dags = dag_op.add_dags(session=session) + dag_op.update_dags(orm_dags, processor_subdir=processor_subdir, session=session) + DagCode.bulk_sync_to_db((dag.fileloc for dag in dags), session=session) - dataset_op = DatasetModelOperation.collect(dags_by_ids) + dataset_op = DatasetModelOperation.collect(dag_op.dags) orm_datasets = dataset_op.add_datasets(session=session) orm_dataset_aliases = dataset_op.add_dataset_aliases(session=session) @@ -3300,7 +3293,7 @@ def dag( "core", "max_consecutive_failed_dag_runs_per_dag" ), dagrun_timeout: timedelta | None = None, - sla_miss_callback: None | SLAMissCallback | list[SLAMissCallback] = None, + sla_miss_callback: Any = None, default_view: str = airflow_conf.get_mandatory_value("webserver", "dag_default_view").lower(), orientation: str = airflow_conf.get_mandatory_value("webserver", "dag_orientation"), catchup: bool = airflow_conf.getboolean("scheduler", "catchup_by_default"), diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index c932958861f7a3..3ef1c18f152a47 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -67,6 +67,7 @@ from airflow.utils.dates import datetime_to_nano from airflow.utils.helpers import chunks, is_container, prune_dict from airflow.utils.log.logging_mixin import LoggingMixin +from airflow.utils.retries import retry_db_transaction from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.sqlalchemy import UtcDateTime, nulls_first, tuple_in_condition, with_row_locks from airflow.utils.state import DagRunState, State, TaskInstanceState @@ -388,12 +389,8 @@ def active_runs_of_dags( return dict(iter(session.execute(query))) @classmethod - def next_dagruns_to_examine( - cls, - state: DagRunState, - session: Session, - max_number: int | None = None, - ) -> Query: + @retry_db_transaction + def get_running_dag_runs_to_examine(cls, session: Session) -> Query: """ Return the next DagRuns that the scheduler should attempt to schedule. @@ -401,42 +398,79 @@ def next_dagruns_to_examine( query, you should ensure that any scheduling decisions are made in a single transaction -- as soon as the transaction is committed it will be unlocked. + :meta private: """ from airflow.models.dag import DagModel - if max_number is None: - max_number = cls.DEFAULT_DAGRUNS_TO_EXAMINE - - # TODO: Bake this query, it is run _A lot_ query = ( select(cls) .with_hint(cls, "USE INDEX (idx_dag_run_running_dags)", dialect_name="mysql") - .where(cls.state == state, cls.run_type != DagRunType.BACKFILL_JOB) + .where(cls.state == DagRunState.RUNNING, cls.run_type != DagRunType.BACKFILL_JOB) .join(DagModel, DagModel.dag_id == cls.dag_id) .where(DagModel.is_paused == false(), DagModel.is_active == true()) + .order_by( + nulls_first(cls.last_scheduling_decision, session=session), + cls.execution_date, + ) ) - if state == DagRunState.QUEUED: - # For dag runs in the queued state, we check if they have reached the max_active_runs limit - # and if so we drop them - running_drs = ( - select(DagRun.dag_id, func.count(DagRun.state).label("num_running")) - .where(DagRun.state == DagRunState.RUNNING) - .group_by(DagRun.dag_id) - .subquery() + + if not settings.ALLOW_FUTURE_EXEC_DATES: + query = query.where(DagRun.execution_date <= func.now()) + + return session.scalars( + with_row_locks( + query.limit(cls.DEFAULT_DAGRUNS_TO_EXAMINE), + of=cls, + session=session, + skip_locked=True, ) - query = query.outerjoin(running_drs, running_drs.c.dag_id == DagRun.dag_id).where( - func.coalesce(running_drs.c.num_running, 0) < DagModel.max_active_runs + ) + + @classmethod + @retry_db_transaction + def get_queued_dag_runs_to_set_running(cls, session: Session) -> Query: + """ + Return the next queued DagRuns that the scheduler should attempt to schedule. + + This will return zero or more DagRun rows that are row-level-locked with a "SELECT ... FOR UPDATE" + query, you should ensure that any scheduling decisions are made in a single transaction -- as soon as + the transaction is committed it will be unlocked. + + :meta private: + """ + from airflow.models.dag import DagModel + + # For dag runs in the queued state, we check if they have reached the max_active_runs limit + # and if so we drop them + running_drs = ( + select(DagRun.dag_id, func.count(DagRun.state).label("num_running")) + .where(DagRun.state == DagRunState.RUNNING) + .group_by(DagRun.dag_id) + .subquery() + ) + query = ( + select(cls) + .where(cls.state == DagRunState.QUEUED, cls.run_type != DagRunType.BACKFILL_JOB) + .join(DagModel, DagModel.dag_id == cls.dag_id) + .where(DagModel.is_paused == false(), DagModel.is_active == true()) + .outerjoin(running_drs, running_drs.c.dag_id == DagRun.dag_id) + .where(func.coalesce(running_drs.c.num_running, 0) < DagModel.max_active_runs) + .order_by( + nulls_first(cls.last_scheduling_decision, session=session), + cls.execution_date, ) - query = query.order_by( - nulls_first(cls.last_scheduling_decision, session=session), - cls.execution_date, ) if not settings.ALLOW_FUTURE_EXEC_DATES: query = query.where(DagRun.execution_date <= func.now()) return session.scalars( - with_row_locks(query.limit(max_number), of=cls, session=session, skip_locked=True) + with_row_locks( + query.limit(cls.DEFAULT_DAGRUNS_TO_EXAMINE), + of=cls, + session=session, + skip_locked=True, + ) ) @classmethod diff --git a/airflow/models/dataset.py b/airflow/models/dataset.py index 5033da48a3059e..489d6b68a6f151 100644 --- a/airflow/models/dataset.py +++ b/airflow/models/dataset.py @@ -138,6 +138,9 @@ def __eq__(self, other): else: return NotImplemented + def to_public(self) -> DatasetAlias: + return DatasetAlias(name=self.name) + class DatasetModel(Base): """ @@ -200,6 +203,9 @@ def __hash__(self): def __repr__(self): return f"{self.__class__.__name__}(uri={self.uri!r}, extra={self.extra!r})" + def to_public(self) -> Dataset: + return Dataset(uri=self.uri, extra=self.extra) + class DagScheduleDatasetAliasReference(Base): """References from a DAG to a dataset alias of which it is a consumer.""" diff --git a/airflow/models/mappedoperator.py b/airflow/models/mappedoperator.py index 2cb7d993fc9f9d..8a9e790ea7fc65 100644 --- a/airflow/models/mappedoperator.py +++ b/airflow/models/mappedoperator.py @@ -26,7 +26,7 @@ import attr import methodtools -from airflow.exceptions import AirflowException, UnmappableOperator +from airflow.exceptions import UnmappableOperator from airflow.models.abstractoperator import ( DEFAULT_EXECUTOR, DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST, @@ -328,11 +328,6 @@ def __attrs_post_init__(self): for k, v in self.partial_kwargs.items(): if k in self.template_fields: XComArg.apply_upstream_relationship(self, v) - if self.partial_kwargs.get("sla") is not None: - raise AirflowException( - f"SLAs are unsupported with mapped tasks. Please set `sla=None` for task " - f"{self.task_id!r}." - ) @methodtools.lru_cache(maxsize=None) @classmethod @@ -547,14 +542,6 @@ def weight_rule(self) -> PriorityWeightStrategy: # type: ignore[override] def weight_rule(self, value: str | PriorityWeightStrategy) -> None: self.partial_kwargs["weight_rule"] = validate_and_load_priority_weight_strategy(value) - @property - def sla(self) -> datetime.timedelta | None: - return self.partial_kwargs.get("sla") - - @sla.setter - def sla(self, value: datetime.timedelta | None) -> None: - self.partial_kwargs["sla"] = value - @property def max_active_tis_per_dag(self) -> int | None: return self.partial_kwargs.get("max_active_tis_per_dag") diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 954e5ed4d0c809..c17acdd2b72124 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -89,7 +89,7 @@ from airflow.listeners.listener import get_listener_manager from airflow.models.base import Base, StringID, TaskInstanceDependencies, _sentinel from airflow.models.dagbag import DagBag -from airflow.models.dataset import DatasetAliasModel, DatasetModel +from airflow.models.dataset import DatasetModel from airflow.models.log import Log from airflow.models.param import process_params from airflow.models.renderedtifields import get_serialized_template_fields @@ -2893,7 +2893,7 @@ def _register_dataset_changes(self, *, events: OutletEventAccessors, session: Se # One task only triggers one dataset event for each dataset with the same extra. # This tuple[dataset uri, extra] to sets alias names mapping is used to find whether # there're datasets with same uri but different extra that we need to emit more than one dataset events. - dataset_tuple_to_alias_names_mapping: dict[tuple[str, frozenset], set[str]] = defaultdict(set) + dataset_alias_names: dict[tuple[str, frozenset], set[str]] = defaultdict(set) for obj in self.task.outlets or []: self.log.debug("outlet obj %s", obj) # Lineage can have other types of objects besides datasets @@ -2908,33 +2908,25 @@ def _register_dataset_changes(self, *, events: OutletEventAccessors, session: Se for dataset_alias_event in events[obj].dataset_alias_events: dataset_alias_name = dataset_alias_event["source_alias_name"] dataset_uri = dataset_alias_event["dest_dataset_uri"] - extra = dataset_alias_event["extra"] - frozen_extra = frozenset(extra.items()) + frozen_extra = frozenset(dataset_alias_event["extra"].items()) + dataset_alias_names[(dataset_uri, frozen_extra)].add(dataset_alias_name) - dataset_tuple_to_alias_names_mapping[(dataset_uri, frozen_extra)].add(dataset_alias_name) - - dataset_objs_cache: dict[str, DatasetModel] = {} - for (uri, extra_items), alias_names in dataset_tuple_to_alias_names_mapping.items(): - if uri not in dataset_objs_cache: - dataset_obj = session.scalar(select(DatasetModel).where(DatasetModel.uri == uri).limit(1)) - dataset_objs_cache[uri] = dataset_obj - else: - dataset_obj = dataset_objs_cache[uri] - - if not dataset_obj: - dataset_obj = DatasetModel(uri=uri) - dataset_manager.create_datasets(dataset_models=[dataset_obj], session=session) - self.log.warning("Created a new %r as it did not exist.", dataset_obj) - session.flush() - dataset_objs_cache[uri] = dataset_obj - - for alias in alias_names: - alias_obj = session.scalar( - select(DatasetAliasModel).where(DatasetAliasModel.name == alias).limit(1) - ) - dataset_obj.aliases.append(alias_obj) + dataset_models: dict[str, DatasetModel] = { + dataset_obj.uri: dataset_obj + for dataset_obj in session.scalars( + select(DatasetModel).where(DatasetModel.uri.in_(uri for uri, _ in dataset_alias_names)) + ) + } + if missing_datasets := [Dataset(uri=u) for u, _ in dataset_alias_names if u not in dataset_models]: + dataset_models.update( + (dataset_obj.uri, dataset_obj) + for dataset_obj in dataset_manager.create_datasets(missing_datasets, session=session) + ) + self.log.warning("Created new datasets for alias reference: %s", missing_datasets) + session.flush() # Needed because we need the id for fk. - extra = {k: v for k, v in extra_items} + for (uri, extra_items), alias_names in dataset_alias_names.items(): + dataset_obj = dataset_models[uri] self.log.info( 'Creating event for %r through aliases "%s"', dataset_obj, @@ -2942,8 +2934,9 @@ def _register_dataset_changes(self, *, events: OutletEventAccessors, session: Se ) dataset_manager.register_dataset_change( task_instance=self, - dataset=dataset_obj, - extra=extra, + dataset=dataset_obj.to_public(), + aliases=[DatasetAlias(name) for name in alias_names], + extra=dict(extra_items), session=session, source_alias_names=alias_names, ) diff --git a/airflow/providers/amazon/CHANGELOG.rst b/airflow/providers/amazon/CHANGELOG.rst index 126da03ad630f3..7596ad3886c7ad 100644 --- a/airflow/providers/amazon/CHANGELOG.rst +++ b/airflow/providers/amazon/CHANGELOG.rst @@ -26,6 +26,33 @@ Changelog --------- +Main +...... + +Breaking changes +~~~~~~~~~~~~~~~~ + +.. warning:: + In order to support session reuse in RedshiftData operators, the following breaking changes were introduced: + + The ``database`` argument is now optional and as a result was moved after the ``sql`` argument which is a positional + one. Update your DAGs accordingly if they rely on argument order. Applies to: + * ``RedshiftDataHook``'s ``execute_query`` method + * ``RedshiftDataOperator`` + + ``RedshiftDataHook``'s ``execute_query`` method now returns a ``QueryExecutionOutput`` object instead of just the + statement ID as a string. + + ``RedshiftDataHook``'s ``parse_statement_resposne`` method was renamed to ``parse_statement_response``. + + ``S3ToRedshiftOperator``'s ``schema`` argument is now optional and was moved after the ``s3_key`` positional argument. + Update your DAGs accordingly if they rely on argument order. + +Features +~~~~~~~~ + +* ``Support session reuse in RedshiftDataOperator, RedshiftToS3Operator and S3ToRedshiftOperator (#42218)`` + 8.29.0 ...... diff --git a/airflow/providers/amazon/aws/auth_manager/cli/avp_commands.py b/airflow/providers/amazon/aws/auth_manager/cli/avp_commands.py index 62452b722977c2..fcd9bddaceded2 100644 --- a/airflow/providers/amazon/aws/auth_manager/cli/avp_commands.py +++ b/airflow/providers/amazon/aws/auth_manager/cli/avp_commands.py @@ -18,7 +18,6 @@ from __future__ import annotations -import json import logging from pathlib import Path from typing import TYPE_CHECKING @@ -64,10 +63,8 @@ def init_avp(args): _set_schema(client, policy_store_id, args) if not args.dry_run: - print( - "Please set configs below in Airflow configuration under AIRFLOW__AWS_AUTH_MANAGER__." - ) - print(json.dumps({"avp_policy_store_id": policy_store_id}, indent=4)) + print("Please set configs below in Airflow configuration:\n") + print(f"AIRFLOW__AWS_AUTH_MANAGER__AVP_POLICY_STORE_ID={policy_store_id}\n") @cli_utils.action_cli diff --git a/airflow/providers/amazon/aws/auth_manager/cli/definition.py b/airflow/providers/amazon/aws/auth_manager/cli/definition.py index bb1236d5c4c944..b5f37136f51d91 100644 --- a/airflow/providers/amazon/aws/auth_manager/cli/definition.py +++ b/airflow/providers/amazon/aws/auth_manager/cli/definition.py @@ -55,12 +55,6 @@ ################ AWS_AUTH_MANAGER_COMMANDS = ( - ActionCommand( - name="init-identity-center", - help="Initialize AWS IAM identity Center resources to be used by AWS manager", - func=lazy_load_command("airflow.providers.amazon.aws.auth_manager.cli.idc_commands.init_idc"), - args=(ARG_INSTANCE_NAME, ARG_APPLICATION_NAME, ARG_DRY_RUN, ARG_VERBOSE), - ), ActionCommand( name="init-avp", help="Initialize Amazon Verified resources to be used by AWS manager", diff --git a/airflow/providers/amazon/aws/auth_manager/cli/idc_commands.py b/airflow/providers/amazon/aws/auth_manager/cli/idc_commands.py deleted file mode 100644 index 388948765ace6c..00000000000000 --- a/airflow/providers/amazon/aws/auth_manager/cli/idc_commands.py +++ /dev/null @@ -1,149 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""User sub-commands.""" - -from __future__ import annotations - -import logging -from typing import TYPE_CHECKING - -import boto3 -from botocore.exceptions import ClientError - -from airflow.configuration import conf -from airflow.exceptions import AirflowOptionalProviderFeatureException -from airflow.providers.amazon.aws.auth_manager.constants import CONF_REGION_NAME_KEY, CONF_SECTION_NAME -from airflow.utils import cli as cli_utils - -try: - from airflow.utils.providers_configuration_loader import providers_configuration_loaded -except ImportError: - raise AirflowOptionalProviderFeatureException( - "Failed to import avp_commands. This feature is only available in Airflow " - "version >= 2.8.0 where Auth Managers are introduced." - ) - -if TYPE_CHECKING: - from botocore.client import BaseClient - -log = logging.getLogger(__name__) - - -@cli_utils.action_cli -@providers_configuration_loaded -def init_idc(args): - """Initialize AWS IAM Identity Center resources.""" - client = _get_client() - - # Create the instance if needed - instance_arn = _create_instance(client, args) - - # Create the application if needed - _create_application(client, instance_arn, args) - - if not args.dry_run: - print("AWS IAM Identity Center resources created successfully.") - - -def _get_client(): - """Return AWS IAM Identity Center client.""" - region_name = conf.get(CONF_SECTION_NAME, CONF_REGION_NAME_KEY) - return boto3.client("sso-admin", region_name=region_name) - - -def _create_instance(client: BaseClient, args) -> str | None: - """Create if needed AWS IAM Identity Center instance.""" - instances = client.list_instances() - - if args.verbose: - log.debug("Instances found: %s", instances) - - if len(instances["Instances"]) > 0: - print( - f"There is already an instance configured in AWS IAM Identity Center: '{instances['Instances'][0]['InstanceArn']}'. " - "No need to create a new one." - ) - return instances["Instances"][0]["InstanceArn"] - else: - print("No instance configured in AWS IAM Identity Center, creating one.") - if args.dry_run: - print("Dry run, not creating the instance.") - return None - - response = client.create_instance(Name=args.instance_name) - if args.verbose: - log.debug("Response from create_instance: %s", response) - - print(f"Instance created: '{response['InstanceArn']}'") - - return response["InstanceArn"] - - -def _create_application(client: BaseClient, instance_arn: str | None, args) -> str | None: - """Create if needed AWS IAM identity Center application.""" - paginator = client.get_paginator("list_applications") - pages = paginator.paginate(InstanceArn=instance_arn or "") - applications = [application for page in pages for application in page["Applications"]] - existing_applications = [ - application for application in applications if application["Name"] == args.application_name - ] - - if args.verbose: - log.debug("Applications found: %s", applications) - log.debug("Existing applications found: %s", existing_applications) - - if len(existing_applications) > 0: - print( - f"There is already an application named '{args.application_name}' in AWS IAM Identity Center: '{existing_applications[0]['ApplicationArn']}'. " - "Using this application." - ) - return existing_applications[0]["ApplicationArn"] - else: - print(f"No application named {args.application_name} found, creating one.") - if args.dry_run: - print("Dry run, not creating the application.") - return None - - try: - response = client.create_application( - ApplicationProviderArn="arn:aws:sso::aws:applicationProvider/custom-saml", - Description="Application automatically created through the Airflow CLI. This application is used to access Airflow environment.", - InstanceArn=instance_arn, - Name=args.application_name, - PortalOptions={ - "SignInOptions": { - "Origin": "IDENTITY_CENTER", - }, - "Visibility": "ENABLED", - }, - Status="ENABLED", - ) - if args.verbose: - log.debug("Response from create_application: %s", response) - except ClientError as e: - # This is needed because as of today, the create_application in AWS Identity Center does not support SAML application - # Remove this part when it is supported - if "is not supported for this action" in e.response["Error"]["Message"]: - print( - "Creation of SAML applications is only supported in AWS console today. " - "Please create the application through the console." - ) - raise - - print(f"Application created: '{response['ApplicationArn']}'") - - return response["ApplicationArn"] diff --git a/airflow/providers/amazon/aws/auth_manager/views/auth.py b/airflow/providers/amazon/aws/auth_manager/views/auth.py index 7ea602d0dd45d6..e08c2a7a6e1007 100644 --- a/airflow/providers/amazon/aws/auth_manager/views/auth.py +++ b/airflow/providers/amazon/aws/auth_manager/views/auth.py @@ -61,7 +61,7 @@ def login(self): saml_auth = self._init_saml_auth() return redirect(saml_auth.login()) - @expose("/logout") + @expose("/logout", methods=("GET", "POST")) def logout(self): """Start logout process.""" session.clear() diff --git a/airflow/providers/amazon/aws/hooks/redshift_data.py b/airflow/providers/amazon/aws/hooks/redshift_data.py index 3c1f84b1f694c3..b2f46c0ef6049e 100644 --- a/airflow/providers/amazon/aws/hooks/redshift_data.py +++ b/airflow/providers/amazon/aws/hooks/redshift_data.py @@ -18,8 +18,12 @@ from __future__ import annotations import time +from dataclasses import dataclass from pprint import pformat from typing import TYPE_CHECKING, Any, Iterable +from uuid import UUID + +from pendulum import duration from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook from airflow.providers.amazon.aws.utils import trim_none_values @@ -35,6 +39,14 @@ RUNNING_STATES = {"PICKED", "STARTED", "SUBMITTED"} +@dataclass +class QueryExecutionOutput: + """Describes the output of a query execution.""" + + statement_id: str + session_id: str | None + + class RedshiftDataQueryFailedError(ValueError): """Raise an error that redshift data query failed.""" @@ -65,8 +77,8 @@ def __init__(self, *args, **kwargs) -> None: def execute_query( self, - database: str, sql: str | list[str], + database: str | None = None, cluster_identifier: str | None = None, db_user: str | None = None, parameters: Iterable | None = None, @@ -76,23 +88,28 @@ def execute_query( wait_for_completion: bool = True, poll_interval: int = 10, workgroup_name: str | None = None, - ) -> str: + session_id: str | None = None, + session_keep_alive_seconds: int | None = None, + ) -> QueryExecutionOutput: """ Execute a statement against Amazon Redshift. - :param database: the name of the database :param sql: the SQL statement or list of SQL statement to run + :param database: the name of the database :param cluster_identifier: unique identifier of a cluster :param db_user: the database username :param parameters: the parameters for the SQL statement :param secret_arn: the name or ARN of the secret that enables db access :param statement_name: the name of the SQL statement - :param with_event: indicates whether to send an event to EventBridge - :param wait_for_completion: indicates whether to wait for a result, if True wait, if False don't wait + :param with_event: whether to send an event to EventBridge + :param wait_for_completion: whether to wait for a result :param poll_interval: how often in seconds to check the query status :param workgroup_name: name of the Redshift Serverless workgroup. Mutually exclusive with `cluster_identifier`. Specify this parameter to query Redshift Serverless. More info https://docs.aws.amazon.com/redshift/latest/mgmt/working-with-serverless.html + :param session_id: the session identifier of the query + :param session_keep_alive_seconds: duration in seconds to keep the session alive after the query + finishes. The maximum time a session can keep alive is 24 hours :returns statement_id: str, the UUID of the statement """ @@ -105,7 +122,28 @@ def execute_query( "SecretArn": secret_arn, "StatementName": statement_name, "WorkgroupName": workgroup_name, + "SessionId": session_id, + "SessionKeepAliveSeconds": session_keep_alive_seconds, } + + if sum(x is not None for x in (cluster_identifier, workgroup_name, session_id)) != 1: + raise ValueError( + "Exactly one of cluster_identifier, workgroup_name, or session_id must be provided" + ) + + if session_id is not None: + msg = "session_id must be a valid UUID4" + try: + if UUID(session_id).version != 4: + raise ValueError(msg) + except ValueError: + raise ValueError(msg) + + if session_keep_alive_seconds is not None and ( + session_keep_alive_seconds < 0 or duration(seconds=session_keep_alive_seconds).hours > 24 + ): + raise ValueError("Session keep alive duration must be between 0 and 86400 seconds.") + if isinstance(sql, list): kwargs["Sqls"] = sql resp = self.conn.batch_execute_statement(**trim_none_values(kwargs)) @@ -115,13 +153,10 @@ def execute_query( statement_id = resp["Id"] - if bool(cluster_identifier) is bool(workgroup_name): - raise ValueError("Either 'cluster_identifier' or 'workgroup_name' must be specified.") - if wait_for_completion: self.wait_for_results(statement_id, poll_interval=poll_interval) - return statement_id + return QueryExecutionOutput(statement_id=statement_id, session_id=resp.get("SessionId")) def wait_for_results(self, statement_id: str, poll_interval: int) -> str: while True: @@ -135,9 +170,9 @@ def wait_for_results(self, statement_id: str, poll_interval: int) -> str: def check_query_is_finished(self, statement_id: str) -> bool: """Check whether query finished, raise exception is failed.""" resp = self.conn.describe_statement(Id=statement_id) - return self.parse_statement_resposne(resp) + return self.parse_statement_response(resp) - def parse_statement_resposne(self, resp: DescribeStatementResponseTypeDef) -> bool: + def parse_statement_response(self, resp: DescribeStatementResponseTypeDef) -> bool: """Parse the response of describe_statement.""" status = resp["Status"] if status == FINISHED_STATE: @@ -179,8 +214,10 @@ def get_table_primary_key( :param table: Name of the target table :param database: the name of the database :param schema: Name of the target schema, public by default - :param sql: the SQL statement or list of SQL statement to run :param cluster_identifier: unique identifier of a cluster + :param workgroup_name: name of the Redshift Serverless workgroup. Mutually exclusive with + `cluster_identifier`. Specify this parameter to query Redshift Serverless. More info + https://docs.aws.amazon.com/redshift/latest/mgmt/working-with-serverless.html :param db_user: the database username :param secret_arn: the name or ARN of the secret that enables db access :param statement_name: the name of the SQL statement @@ -212,7 +249,8 @@ def get_table_primary_key( with_event=with_event, wait_for_completion=wait_for_completion, poll_interval=poll_interval, - ) + ).statement_id + pk_columns = [] token = "" while True: @@ -251,4 +289,4 @@ async def check_query_is_finished_async(self, statement_id: str) -> bool: """ async with self.async_conn as client: resp = await client.describe_statement(Id=statement_id) - return self.parse_statement_resposne(resp) + return self.parse_statement_response(resp) diff --git a/airflow/providers/amazon/aws/hooks/sagemaker.py b/airflow/providers/amazon/aws/hooks/sagemaker.py index af131697a5e8d2..2c0f4fb25edc51 100644 --- a/airflow/providers/amazon/aws/hooks/sagemaker.py +++ b/airflow/providers/amazon/aws/hooks/sagemaker.py @@ -155,6 +155,7 @@ class SageMakerHook(AwsBaseHook): endpoint_non_terminal_states = {"Creating", "Updating", "SystemUpdating", "RollingBack", "Deleting"} pipeline_non_terminal_states = {"Executing", "Stopping"} failed_states = {"Failed"} + training_failed_states = {*failed_states, "Stopped"} def __init__(self, *args, **kwargs): super().__init__(client_type="sagemaker", *args, **kwargs) @@ -309,7 +310,7 @@ def create_training_job( self.check_training_status_with_log( config["TrainingJobName"], self.non_terminal_states, - self.failed_states, + self.training_failed_states, wait_for_completion, check_interval, max_ingestion_time, diff --git a/airflow/providers/amazon/aws/operators/redshift_data.py b/airflow/providers/amazon/aws/operators/redshift_data.py index 45fee2a919483e..3d00c6d22edf74 100644 --- a/airflow/providers/amazon/aws/operators/redshift_data.py +++ b/airflow/providers/amazon/aws/operators/redshift_data.py @@ -56,13 +56,16 @@ class RedshiftDataOperator(AwsBaseOperator[RedshiftDataHook]): :param workgroup_name: name of the Redshift Serverless workgroup. Mutually exclusive with `cluster_identifier`. Specify this parameter to query Redshift Serverless. More info https://docs.aws.amazon.com/redshift/latest/mgmt/working-with-serverless.html + :param session_id: the session identifier of the query + :param session_keep_alive_seconds: duration in seconds to keep the session alive after the query + finishes. The maximum time a session can keep alive is 24 hours :param aws_conn_id: The Airflow connection used for AWS credentials. If this is ``None`` or empty then the default boto3 behaviour is used. If running Airflow in a distributed manner and aws_conn_id is None or empty, then default boto3 configuration would be used (and must be maintained on each worker node). :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used. - :param verify: Whether or not to verify SSL certificates. See: + :param verify: Whether to verify SSL certificates. See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html :param botocore_config: Configuration dictionary (key-values) for botocore client. See: https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html @@ -77,6 +80,7 @@ class RedshiftDataOperator(AwsBaseOperator[RedshiftDataHook]): "parameters", "statement_name", "workgroup_name", + "session_id", ) template_ext = (".sql",) template_fields_renderers = {"sql": "sql"} @@ -84,8 +88,8 @@ class RedshiftDataOperator(AwsBaseOperator[RedshiftDataHook]): def __init__( self, - database: str, sql: str | list, + database: str | None = None, cluster_identifier: str | None = None, db_user: str | None = None, parameters: list | None = None, @@ -97,6 +101,8 @@ def __init__( return_sql_result: bool = False, workgroup_name: str | None = None, deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False), + session_id: str | None = None, + session_keep_alive_seconds: int | None = None, **kwargs, ) -> None: super().__init__(**kwargs) @@ -120,6 +126,8 @@ def __init__( self.return_sql_result = return_sql_result self.statement_id: str | None = None self.deferrable = deferrable + self.session_id = session_id + self.session_keep_alive_seconds = session_keep_alive_seconds def execute(self, context: Context) -> GetStatementResultResponseTypeDef | str: """Execute a statement against Amazon Redshift.""" @@ -130,7 +138,7 @@ def execute(self, context: Context) -> GetStatementResultResponseTypeDef | str: if self.deferrable: wait_for_completion = False - self.statement_id = self.hook.execute_query( + query_execution_output = self.hook.execute_query( database=self.database, sql=self.sql, cluster_identifier=self.cluster_identifier, @@ -142,8 +150,15 @@ def execute(self, context: Context) -> GetStatementResultResponseTypeDef | str: with_event=self.with_event, wait_for_completion=wait_for_completion, poll_interval=self.poll_interval, + session_id=self.session_id, + session_keep_alive_seconds=self.session_keep_alive_seconds, ) + self.statement_id = query_execution_output.statement_id + + if query_execution_output.session_id: + self.xcom_push(context, key="session_id", value=query_execution_output.session_id) + if self.deferrable and self.wait_for_completion: is_finished = self.hook.check_query_is_finished(self.statement_id) if not is_finished: diff --git a/airflow/providers/amazon/aws/sensors/dynamodb.py b/airflow/providers/amazon/aws/sensors/dynamodb.py index dbb7f973041e6f..ead8c123a621ad 100644 --- a/airflow/providers/amazon/aws/sensors/dynamodb.py +++ b/airflow/providers/amazon/aws/sensors/dynamodb.py @@ -18,6 +18,8 @@ from typing import TYPE_CHECKING, Any, Iterable, Sequence +from botocore.exceptions import ClientError + from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor from airflow.providers.amazon.aws.utils.mixins import aws_template_fields @@ -102,14 +104,26 @@ def poke(self, context: Context) -> bool: table = self.hook.conn.Table(self.table_name) self.log.info("Table: %s", table) self.log.info("Key: %s", key) - response = table.get_item(Key=key) + try: - item_attribute_value = response["Item"][self.attribute_name] - self.log.info("Response: %s", response) - self.log.info("Want: %s = %s", self.attribute_name, self.attribute_value) - self.log.info("Got: {response['Item'][self.attribute_name]} = %s", item_attribute_value) - return item_attribute_value in ( - [self.attribute_value] if isinstance(self.attribute_value, str) else self.attribute_value + response = table.get_item(Key=key) + except ClientError as err: + self.log.error( + "Couldn't get %s from table %s.\nError Code: %s\nError Message: %s", + key, + self.table_name, + err.response["Error"]["Code"], + err.response["Error"]["Message"], ) - except KeyError: return False + else: + try: + item_attribute_value = response["Item"][self.attribute_name] + self.log.info("Response: %s", response) + self.log.info("Want: %s = %s", self.attribute_name, self.attribute_value) + self.log.info("Got: {response['Item'][self.attribute_name]} = %s", item_attribute_value) + return item_attribute_value in ( + [self.attribute_value] if isinstance(self.attribute_value, str) else self.attribute_value + ) + except KeyError: + return False diff --git a/airflow/providers/amazon/aws/sensors/sagemaker.py b/airflow/providers/amazon/aws/sensors/sagemaker.py index b01e24cd5b8158..af07c504aa29d0 100644 --- a/airflow/providers/amazon/aws/sensors/sagemaker.py +++ b/airflow/providers/amazon/aws/sensors/sagemaker.py @@ -238,7 +238,7 @@ def non_terminal_states(self): return SageMakerHook.non_terminal_states def failed_states(self): - return SageMakerHook.failed_states + return SageMakerHook.training_failed_states def get_sagemaker_response(self): if self.print_log: diff --git a/airflow/providers/amazon/aws/transfers/redshift_to_s3.py b/airflow/providers/amazon/aws/transfers/redshift_to_s3.py index ef3cebdae98389..8538b1dfc313cc 100644 --- a/airflow/providers/amazon/aws/transfers/redshift_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/redshift_to_s3.py @@ -45,7 +45,8 @@ class RedshiftToS3Operator(BaseOperator): :param s3_key: reference to a specific S3 key. If ``table_as_file_name`` is set to False, this param must include the desired file name :param schema: reference to a specific schema in redshift database, - used when ``table`` param provided and ``select_query`` param not provided + used when ``table`` param provided and ``select_query`` param not provided. + Do not provide when unloading a temporary table :param table: reference to a specific table in redshift database, used when ``schema`` param provided and ``select_query`` param not provided :param select_query: custom select query to fetch data from redshift database, @@ -55,8 +56,8 @@ class RedshiftToS3Operator(BaseOperator): If the AWS connection contains 'aws_iam_role' in ``extras`` the operator will use AWS STS credentials with a token https://docs.aws.amazon.com/redshift/latest/dg/copy-parameters-authorization.html#copy-credentials - :param verify: Whether or not to verify SSL certificates for S3 connection. - By default SSL certificates are verified. + :param verify: Whether to verify SSL certificates for S3 connection. + By default, SSL certificates are verified. You can provide the following values: - ``False``: do not validate SSL certificates. SSL will still be used @@ -67,7 +68,7 @@ class RedshiftToS3Operator(BaseOperator): CA cert bundle than the one used by botocore. :param unload_options: reference to a list of UNLOAD options :param autocommit: If set to True it will automatically commit the UNLOAD statement. - Otherwise it will be committed right before the redshift connection gets closed. + Otherwise, it will be committed right before the redshift connection gets closed. :param include_header: If set to True the s3 file contains the header columns. :param parameters: (optional) the parameters to render the SQL query with. :param table_as_file_name: If set to True, the s3 file will be named as the table. @@ -141,9 +142,15 @@ def _build_unload_query( @property def default_select_query(self) -> str | None: - if self.schema and self.table: - return f"SELECT * FROM {self.schema}.{self.table}" - return None + if not self.table: + return None + + if self.schema: + table = f"{self.schema}.{self.table}" + else: + # Relevant when unloading a temporary table + table = self.table + return f"SELECT * FROM {table}" def execute(self, context: Context) -> None: if self.table and self.table_as_file_name: @@ -152,9 +159,7 @@ def execute(self, context: Context) -> None: self.select_query = self.select_query or self.default_select_query if self.select_query is None: - raise ValueError( - "Please provide both `schema` and `table` params or `select_query` to fetch the data." - ) + raise ValueError("Please specify either a table or `select_query` to fetch the data.") if self.include_header and "HEADER" not in [uo.upper().strip() for uo in self.unload_options]: self.unload_options = [*self.unload_options, "HEADER"] diff --git a/airflow/providers/amazon/aws/transfers/s3_to_redshift.py b/airflow/providers/amazon/aws/transfers/s3_to_redshift.py index 127ee07a60bbde..792119bfebb557 100644 --- a/airflow/providers/amazon/aws/transfers/s3_to_redshift.py +++ b/airflow/providers/amazon/aws/transfers/s3_to_redshift.py @@ -28,7 +28,6 @@ if TYPE_CHECKING: from airflow.utils.context import Context - AVAILABLE_METHODS = ["APPEND", "REPLACE", "UPSERT"] @@ -40,17 +39,18 @@ class S3ToRedshiftOperator(BaseOperator): For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:S3ToRedshiftOperator` - :param schema: reference to a specific schema in redshift database :param table: reference to a specific table in redshift database :param s3_bucket: reference to a specific S3 bucket :param s3_key: key prefix that selects single or multiple objects from S3 + :param schema: reference to a specific schema in redshift database. + Do not provide when copying into a temporary table :param redshift_conn_id: reference to a specific redshift database OR a redshift data-api connection :param aws_conn_id: reference to a specific S3 connection If the AWS connection contains 'aws_iam_role' in ``extras`` the operator will use AWS STS credentials with a token https://docs.aws.amazon.com/redshift/latest/dg/copy-parameters-authorization.html#copy-credentials - :param verify: Whether or not to verify SSL certificates for S3 connection. - By default SSL certificates are verified. + :param verify: Whether to verify SSL certificates for S3 connection. + By default, SSL certificates are verified. You can provide the following values: - ``False``: do not validate SSL certificates. SSL will still be used @@ -87,10 +87,10 @@ class S3ToRedshiftOperator(BaseOperator): def __init__( self, *, - schema: str, table: str, s3_bucket: str, s3_key: str, + schema: str | None = None, redshift_conn_id: str = "redshift_default", aws_conn_id: str | None = "aws_default", verify: bool | str | None = None, @@ -160,7 +160,7 @@ def execute(self, context: Context) -> None: credentials_block = build_credentials_block(credentials) copy_options = "\n\t\t\t".join(self.copy_options) - destination = f"{self.schema}.{self.table}" + destination = f"{self.schema}.{self.table}" if self.schema else self.table copy_destination = f"#{self.table}" if self.method == "UPSERT" else destination copy_statement = self._build_copy_query( diff --git a/airflow/providers/amazon/aws/utils/openlineage.py b/airflow/providers/amazon/aws/utils/openlineage.py index db472a3e46c5f3..be5703e2f6e801 100644 --- a/airflow/providers/amazon/aws/utils/openlineage.py +++ b/airflow/providers/amazon/aws/utils/openlineage.py @@ -86,7 +86,9 @@ def get_facets_from_redshift_table( ] ) else: - statement_id = redshift_hook.execute_query(sql=sql, poll_interval=1, **redshift_data_api_kwargs) + statement_id = redshift_hook.execute_query( + sql=sql, poll_interval=1, **redshift_data_api_kwargs + ).statement_id response = redshift_hook.conn.get_statement_result(Id=statement_id) table_schema = SchemaDatasetFacet( diff --git a/airflow/providers/amazon/aws/utils/task_log_fetcher.py b/airflow/providers/amazon/aws/utils/task_log_fetcher.py index 83c42f685792a7..5a344b507e8ce3 100644 --- a/airflow/providers/amazon/aws/utils/task_log_fetcher.py +++ b/airflow/providers/amazon/aws/utils/task_log_fetcher.py @@ -70,7 +70,7 @@ def run(self) -> None: # timestamp) # When a slight delay is added before logging the event, that solves the issue # See https://github.com/apache/airflow/issues/40875 - time.sleep(0.1) + time.sleep(0.001) self.logger.info(self.event_to_str(log_event)) prev_timestamp_event = current_timestamp_event diff --git a/airflow/providers/apache/beam/operators/beam.py b/airflow/providers/apache/beam/operators/beam.py index a2d9bb9b893345..41c55ede2a5bc8 100644 --- a/airflow/providers/apache/beam/operators/beam.py +++ b/airflow/providers/apache/beam/operators/beam.py @@ -468,6 +468,7 @@ def on_kill(self) -> None: self.dataflow_hook.cancel_job( job_id=self.dataflow_job_id, project_id=self.dataflow_config.project_id, + location=self.dataflow_config.location, ) @@ -573,6 +574,7 @@ def execute_sync(self, context: Context): is_running = self.dataflow_hook.is_job_dataflow_running( name=self.dataflow_config.job_name, variables=self.pipeline_options, + location=self.dataflow_config.location, ) if not is_running: @@ -656,6 +658,7 @@ def on_kill(self) -> None: self.dataflow_hook.cancel_job( job_id=self.dataflow_job_id, project_id=self.dataflow_config.project_id, + location=self.dataflow_config.location, ) @@ -807,6 +810,7 @@ def on_kill(self) -> None: self.dataflow_hook.cancel_job( job_id=self.dataflow_job_id, project_id=self.dataflow_config.project_id, + location=self.dataflow_config.location, ) diff --git a/airflow/providers/common/sql/operators/sql.py b/airflow/providers/common/sql/operators/sql.py index f8b4080d6670d7..fa1539e7250a57 100644 --- a/airflow/providers/common/sql/operators/sql.py +++ b/airflow/providers/common/sql/operators/sql.py @@ -221,7 +221,7 @@ def __init__( sql: str | list[str], autocommit: bool = False, parameters: Mapping | Iterable | None = None, - handler: Callable[[Any], Any] = fetch_all_handler, + handler: Callable[[Any], list[tuple] | None] = fetch_all_handler, conn_id: str | None = None, database: str | None = None, split_statements: bool | None = None, diff --git a/airflow/providers/common/sql/operators/sql.pyi b/airflow/providers/common/sql/operators/sql.pyi index f8fa23c37edc78..0a63ccaa7cc388 100644 --- a/airflow/providers/common/sql/operators/sql.pyi +++ b/airflow/providers/common/sql/operators/sql.pyi @@ -89,7 +89,7 @@ class SQLExecuteQueryOperator(BaseSQLOperator): sql: str | list[str], autocommit: bool = False, parameters: Mapping | Iterable | None = None, - handler: Callable[[Any], Any] = ..., + handler: Callable[[Any], list[tuple] | None] = ..., conn_id: str | None = None, database: str | None = None, split_statements: bool | None = None, diff --git a/airflow/providers/edge/cli/__init__.py b/airflow/providers/edge/cli/__init__.py new file mode 100644 index 00000000000000..13a83393a9124b --- /dev/null +++ b/airflow/providers/edge/cli/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/edge/cli/edge_command.py b/airflow/providers/edge/cli/edge_command.py new file mode 100644 index 00000000000000..09998ffe802810 --- /dev/null +++ b/airflow/providers/edge/cli/edge_command.py @@ -0,0 +1,313 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import logging +import os +import platform +import signal +import sys +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from subprocess import Popen +from time import sleep + +import psutil +from lockfile.pidlockfile import read_pid_from_pidfile, remove_existing_pidfile, write_pid_to_pidfile + +from airflow import __version__ as airflow_version, settings +from airflow.api_internal.internal_api_call import InternalApiConfig +from airflow.cli.cli_config import ARG_PID, ARG_VERBOSE, ActionCommand, Arg +from airflow.configuration import conf +from airflow.exceptions import AirflowException +from airflow.providers.edge import __version__ as edge_provider_version +from airflow.providers.edge.models.edge_job import EdgeJob +from airflow.providers.edge.models.edge_logs import EdgeLogs +from airflow.providers.edge.models.edge_worker import EdgeWorker, EdgeWorkerState +from airflow.utils import cli as cli_utils +from airflow.utils.platform import IS_WINDOWS +from airflow.utils.providers_configuration_loader import providers_configuration_loaded +from airflow.utils.state import TaskInstanceState + +logger = logging.getLogger(__name__) +EDGE_WORKER_PROCESS_NAME = "edge-worker" +EDGE_WORKER_HEADER = "\n".join( + [ + r" ____ __ _ __ __", + r" / __/__/ /__ ____ | | /| / /__ ____/ /_____ ____", + r" / _// _ / _ `/ -_) | |/ |/ / _ \/ __/ '_/ -_) __/", + r"/___/\_,_/\_, /\__/ |__/|__/\___/_/ /_/\_\\__/_/", + r" /___/", + r"", + ] +) + + +@providers_configuration_loaded +def force_use_internal_api_on_edge_worker(): + """ + Ensure that the environment is configured for the internal API without needing to declare it outside. + + This is only required for an Edge worker and must to be done before the Click CLI wrapper is initiated. + That is because the CLI wrapper will attempt to establish a DB connection, which will fail before the + function call can take effect. In an Edge worker, we need to "patch" the environment before starting. + """ + if "airflow" in sys.argv[0] and sys.argv[1:3] == ["edge", "worker"]: + api_url = conf.get("edge", "api_url") + if not api_url: + raise SystemExit("Error: API URL is not configured, please correct configuration.") + logger.info("Starting worker with API endpoint %s", api_url) + # export Edge API to be used for internal API + os.environ["AIRFLOW_ENABLE_AIP_44"] = "True" + os.environ["AIRFLOW__CORE__INTERNAL_API_URL"] = api_url + InternalApiConfig.set_use_internal_api("edge-worker") + # Disable mini-scheduler post task execution and leave next task schedule to core scheduler + os.environ["AIRFLOW__SCHEDULER__SCHEDULE_AFTER_TASK_EXECUTION"] = "False" + + +force_use_internal_api_on_edge_worker() + + +def _hostname() -> str: + if IS_WINDOWS: + return platform.uname().node + else: + return os.uname()[1] + + +def _get_sysinfo() -> dict: + """Produce the sysinfo from worker to post to central site.""" + return { + "airflow_version": airflow_version, + "edge_provider_version": edge_provider_version, + } + + +def _pid_file_path(pid_file: str | None) -> str: + return cli_utils.setup_locations(process=EDGE_WORKER_PROCESS_NAME, pid=pid_file)[0] + + +@dataclass +class _Job: + """Holds all information for a task/job to be executed as bundle.""" + + edge_job: EdgeJob + process: Popen + logfile: Path + logsize: int + """Last size of log file, point of last chunk push.""" + + +class _EdgeWorkerCli: + """Runner instance which executes the Edge Worker.""" + + jobs: list[_Job] = [] + """List of jobs that the worker is running currently.""" + last_hb: datetime | None = None + """Timestamp of last heart beat sent to server.""" + drain: bool = False + """Flag if job processing should be completed and no new jobs fetched for a graceful stop/shutdown.""" + + def __init__( + self, + pid_file_path: Path, + hostname: str, + queues: list[str] | None, + concurrency: int, + job_poll_interval: int, + heartbeat_interval: int, + ): + self.pid_file_path = pid_file_path + self.job_poll_interval = job_poll_interval + self.hb_interval = heartbeat_interval + self.hostname = hostname + self.queues = queues + self.concurrency = concurrency + + @staticmethod + def signal_handler(sig, frame): + logger.info("Request to show down Edge Worker received, waiting for jobs to complete.") + _EdgeWorkerCli.drain = True + + def start(self): + """Start the execution in a loop until terminated.""" + try: + self.last_hb = EdgeWorker.register_worker( + self.hostname, EdgeWorkerState.STARTING, self.queues, _get_sysinfo() + ).last_update + except AirflowException as e: + if "404:NOT FOUND" in str(e): + raise SystemExit("Error: API endpoint is not ready, please set [edge] api_enabled=True.") + raise SystemExit(str(e)) + write_pid_to_pidfile(self.pid_file_path) + signal.signal(signal.SIGINT, _EdgeWorkerCli.signal_handler) + try: + while not _EdgeWorkerCli.drain or self.jobs: + self.loop() + + logger.info("Quitting worker, signal being offline.") + EdgeWorker.set_state(self.hostname, EdgeWorkerState.OFFLINE, 0, _get_sysinfo()) + finally: + remove_existing_pidfile(self.pid_file_path) + + def loop(self): + """Run a loop of scheduling and monitoring tasks.""" + new_job = False + if not _EdgeWorkerCli.drain and len(self.jobs) < self.concurrency: + new_job = self.fetch_job() + self.check_running_jobs() + + if _EdgeWorkerCli.drain or datetime.now().timestamp() - self.last_hb.timestamp() > self.hb_interval: + self.heartbeat() + self.last_hb = datetime.now() + + if not new_job: + self.interruptible_sleep() + + def fetch_job(self) -> bool: + """Fetch and start a new job from central site.""" + logger.debug("Attempting to fetch a new job...") + edge_job = EdgeJob.reserve_task(self.hostname, self.queues) + if edge_job: + logger.info("Received job: %s", edge_job) + env = os.environ.copy() + env["AIRFLOW__CORE__DATABASE_ACCESS_ISOLATION"] = "True" + env["AIRFLOW__CORE__INTERNAL_API_URL"] = conf.get("edge", "api_url") + env["_AIRFLOW__SKIP_DATABASE_EXECUTOR_COMPATIBILITY_CHECK"] = "1" + process = Popen(edge_job.command, close_fds=True, env=env) + logfile = EdgeLogs.logfile_path(edge_job.key) + self.jobs.append(_Job(edge_job, process, logfile, 0)) + EdgeJob.set_state(edge_job.key, TaskInstanceState.RUNNING) + return True + + logger.info("No new job to process%s", f", {len(self.jobs)} still running" if self.jobs else "") + return False + + def check_running_jobs(self) -> None: + """Check which of the running tasks/jobs are completed and report back.""" + for i in range(len(self.jobs) - 1, -1, -1): + job = self.jobs[i] + job.process.poll() + if job.process.returncode is not None: + self.jobs.remove(job) + if job.process.returncode == 0: + logger.info("Job completed: %s", job.edge_job) + EdgeJob.set_state(job.edge_job.key, TaskInstanceState.SUCCESS) + else: + logger.error("Job failed: %s", job.edge_job) + EdgeJob.set_state(job.edge_job.key, TaskInstanceState.FAILED) + if job.logfile.exists() and job.logfile.stat().st_size > job.logsize: + with job.logfile.open("r") as logfile: + logfile.seek(job.logsize, os.SEEK_SET) + logdata = logfile.read() + EdgeLogs.push_logs( + task=job.edge_job.key, + log_chunk_time=datetime.now(), + log_chunk_data=logdata, + ) + job.logsize += len(logdata) + + def heartbeat(self) -> None: + """Report liveness state of worker to central site with stats.""" + state = ( + (EdgeWorkerState.TERMINATING if _EdgeWorkerCli.drain else EdgeWorkerState.RUNNING) + if self.jobs + else EdgeWorkerState.IDLE + ) + sysinfo = _get_sysinfo() + EdgeWorker.set_state(self.hostname, state, len(self.jobs), sysinfo) + + def interruptible_sleep(self): + """Sleeps but stops sleeping if drain is made.""" + drain_before_sleep = _EdgeWorkerCli.drain + for _ in range(0, self.job_poll_interval * 10): + sleep(0.1) + if drain_before_sleep != _EdgeWorkerCli.drain: + return + + +@cli_utils.action_cli(check_db=False) +@providers_configuration_loaded +def worker(args): + """Start Airflow Edge Worker.""" + print(settings.HEADER) + print(EDGE_WORKER_HEADER) + + edge_worker = _EdgeWorkerCli( + pid_file_path=_pid_file_path(args.pid), + hostname=args.edge_hostname or _hostname(), + queues=args.queues.split(",") if args.queues else None, + concurrency=args.concurrency, + job_poll_interval=conf.getint("edge", "job_poll_interval"), + heartbeat_interval=conf.getint("edge", "heartbeat_interval"), + ) + edge_worker.start() + + +@cli_utils.action_cli(check_db=False) +@providers_configuration_loaded +def stop(args): + """Stop a running Airflow Edge Worker.""" + pid = read_pid_from_pidfile(_pid_file_path(args.pid)) + # Send SIGINT + if pid: + logger.warning("Sending SIGINT to worker pid %i.", pid) + worker_process = psutil.Process(pid) + worker_process.send_signal(signal.SIGINT) + else: + logger.warning("Could not find PID of worker.") + + +ARG_CONCURRENCY = Arg( + ("-c", "--concurrency"), + type=int, + help="The number of worker processes", + default=conf.getint("edge", "worker_concurrency", fallback=8), +) +ARG_QUEUES = Arg( + ("-q", "--queues"), + help="Comma delimited list of queues to serve, serve all queues if not provided.", +) +ARG_EDGE_HOSTNAME = Arg( + ("-H", "--edge-hostname"), + help="Set the hostname of worker if you have multiple workers on a single machine", +) +EDGE_COMMANDS: list[ActionCommand] = [ + ActionCommand( + name=worker.__name__, + help=worker.__doc__, + func=worker, + args=( + ARG_CONCURRENCY, + ARG_QUEUES, + ARG_EDGE_HOSTNAME, + ARG_PID, + ARG_VERBOSE, + ), + ), + ActionCommand( + name=stop.__name__, + help=stop.__doc__, + func=stop, + args=( + ARG_PID, + ARG_VERBOSE, + ), + ), +] diff --git a/airflow/providers/fab/auth_manager/fab_auth_manager.py b/airflow/providers/fab/auth_manager/fab_auth_manager.py index 2de8db2f564145..3d0f102650935b 100644 --- a/airflow/providers/fab/auth_manager/fab_auth_manager.py +++ b/airflow/providers/fab/auth_manager/fab_auth_manager.py @@ -342,10 +342,7 @@ def get_permitted_dag_ids( resources.add(resource[len(permissions.RESOURCE_DAG_PREFIX) :]) else: resources.add(resource) - return { - dag.dag_id - for dag in session.execute(select(DagModel.dag_id).where(DagModel.dag_id.in_(resources))) - } + return set(session.scalars(select(DagModel.dag_id).where(DagModel.dag_id.in_(resources)))) @cached_property def security_manager(self) -> FabAirflowSecurityManagerOverride: diff --git a/airflow/providers/google/cloud/hooks/dataflow.py b/airflow/providers/google/cloud/hooks/dataflow.py index 97eaa49b36d523..9e94990f550f86 100644 --- a/airflow/providers/google/cloud/hooks/dataflow.py +++ b/airflow/providers/google/cloud/hooks/dataflow.py @@ -1124,18 +1124,17 @@ def build_dataflow_job_name(job_name: str, append_job_name: bool = True) -> str: return safe_job_name - @_fallback_to_location_from_variables @_fallback_to_project_id_from_variables @GoogleBaseHook.fallback_to_default_project_id def is_job_dataflow_running( self, name: str, project_id: str, - location: str = DEFAULT_DATAFLOW_LOCATION, + location: str | None = None, variables: dict | None = None, ) -> bool: """ - Check if jos is still running in dataflow. + Check if job is still running in dataflow. :param name: The name of the job. :param project_id: Optional, the Google Cloud project ID in which to start a job. @@ -1145,11 +1144,21 @@ def is_job_dataflow_running( """ if variables: warnings.warn( - "The variables parameter has been deprecated. You should pass location using " - "the location parameter.", + "The variables parameter has been deprecated. You should pass project_id using " + "the project_id parameter.", AirflowProviderDeprecationWarning, stacklevel=4, ) + + if location is None: + location = DEFAULT_DATAFLOW_LOCATION + warnings.warn( + "The location argument will be become mandatory in future versions, " + f"currently, it defaults to {DEFAULT_DATAFLOW_LOCATION}, please set the location explicitly.", + AirflowProviderDeprecationWarning, + stacklevel=4, + ) + jobs_controller = _DataflowJobsController( dataflow=self.get_conn(), project_number=project_id, diff --git a/airflow/providers/google/cloud/operators/dataflow.py b/airflow/providers/google/cloud/operators/dataflow.py index fd4d0644a77df8..29ac5943edfe3c 100644 --- a/airflow/providers/google/cloud/operators/dataflow.py +++ b/airflow/providers/google/cloud/operators/dataflow.py @@ -432,11 +432,13 @@ def set_current_job_id(job_id): is_running = self.dataflow_hook.is_job_dataflow_running( name=self.job_name, variables=pipeline_options, + location=self.location, ) while is_running and self.check_if_running == CheckJobRunning.WaitForRun: is_running = self.dataflow_hook.is_job_dataflow_running( name=self.job_name, variables=pipeline_options, + location=self.location, ) if not is_running: pipeline_options["jobName"] = job_name diff --git a/airflow/providers/microsoft/azure/operators/msgraph.py b/airflow/providers/microsoft/azure/operators/msgraph.py index 74409f3600a1e7..b3d14b14a57ece 100644 --- a/airflow/providers/microsoft/azure/operators/msgraph.py +++ b/airflow/providers/microsoft/azure/operators/msgraph.py @@ -100,7 +100,7 @@ def __init__( timeout: float | None = None, proxies: dict | None = None, api_version: APIVersion | str | None = None, - pagination_function: Callable[[MSGraphAsyncOperator, dict], tuple[str, dict]] | None = None, + pagination_function: Callable[[MSGraphAsyncOperator, dict, Context], tuple[str, dict]] | None = None, result_processor: Callable[[Context, Any], Any] = lambda context, result: result, serializer: type[ResponseSerializer] = ResponseSerializer, **kwargs: Any, @@ -122,7 +122,6 @@ def __init__( self.pagination_function = pagination_function or self.paginate self.result_processor = result_processor self.serializer: ResponseSerializer = serializer() - self.results: list[Any] | None = None def execute(self, context: Context) -> None: self.defer( @@ -166,6 +165,8 @@ def execute_complete( self.log.debug("response: %s", response) + results = self.pull_xcom(context=context) + if response: response = self.serializer.deserialize(response) @@ -178,39 +179,46 @@ def execute_complete( event["response"] = result try: - self.trigger_next_link(response=response, method_name=self.execute_complete.__name__) + self.trigger_next_link( + response=response, method_name=self.execute_complete.__name__, context=context + ) except TaskDeferred as exception: - self.results = self.pull_xcom(context=context) self.append_result( + results=results, result=result, append_result_as_list_if_absent=True, ) - self.push_xcom(context=context, value=self.results) + self.push_xcom(context=context, value=results) raise exception - self.append_result(result=result) + if not results: + return result - return self.results + self.append_result(results=results, result=result) + return results return None + @classmethod def append_result( - self, + cls, + results: list[Any], result: Any, append_result_as_list_if_absent: bool = False, - ): - if isinstance(self.results, list): + ) -> list[Any]: + if isinstance(results, list): if isinstance(result, list): - self.results.extend(result) + results.extend(result) else: - self.results.append(result) + results.append(result) else: if append_result_as_list_if_absent: if isinstance(result, list): - self.results = result + return result else: - self.results = [result] + return [result] else: - self.results = result + return result + return results def pull_xcom(self, context: Context) -> list: map_index = context["ti"].map_index @@ -251,27 +259,25 @@ def push_xcom(self, context: Context, value) -> None: self.xcom_push(context=context, key=self.key, value=value) @staticmethod - def paginate(operator: MSGraphAsyncOperator, response: dict) -> tuple[Any, dict[str, Any] | None]: + def paginate( + operator: MSGraphAsyncOperator, response: dict, context: Context + ) -> tuple[Any, dict[str, Any] | None]: odata_count = response.get("@odata.count") if odata_count and operator.query_parameters: query_parameters = deepcopy(operator.query_parameters) top = query_parameters.get("$top") - odata_count = response.get("@odata.count") if top and odata_count: - if len(response.get("value", [])) == top: - skip = ( - sum(map(lambda result: len(result["value"]), operator.results)) + top - if operator.results - else top - ) + if len(response.get("value", [])) == top and context: + results = operator.pull_xcom(context=context) + skip = sum(map(lambda result: len(result["value"]), results)) + top if results else top query_parameters["$skip"] = skip return operator.url, query_parameters return response.get("@odata.nextLink"), operator.query_parameters - def trigger_next_link(self, response, method_name="execute_complete") -> None: + def trigger_next_link(self, response, method_name: str, context: Context) -> None: if isinstance(response, dict): - url, query_parameters = self.pagination_function(self, response) + url, query_parameters = self.pagination_function(self, response, context) self.log.debug("url: %s", url) self.log.debug("query_parameters: %s", query_parameters) diff --git a/airflow/providers/openlineage/plugins/adapter.py b/airflow/providers/openlineage/plugins/adapter.py index 8cd6e6c605b45d..51eb649adbf412 100644 --- a/airflow/providers/openlineage/plugins/adapter.py +++ b/airflow/providers/openlineage/plugins/adapter.py @@ -311,7 +311,7 @@ def fail_task( if isinstance(error, BaseException) and error.__traceback__: import traceback - stack_trace = "\\n".join(traceback.format_exception(type(error), error, error.__traceback__)) + stack_trace = "".join(traceback.format_exception(type(error), error, error.__traceback__)) run_facets["errorMessage"] = error_message_run.ErrorMessageRunFacet( message=str(error), programmingLanguage="python", stackTrace=stack_trace ) diff --git a/airflow/serialization/enums.py b/airflow/serialization/enums.py index f216ce73161038..49a3de3d774c44 100644 --- a/airflow/serialization/enums.py +++ b/airflow/serialization/enums.py @@ -71,6 +71,5 @@ class DagAttributeTypes(str, Enum): ARG_NOT_SET = "arg_not_set" TASK_CALLBACK_REQUEST = "task_callback_request" DAG_CALLBACK_REQUEST = "dag_callback_request" - SLA_CALLBACK_REQUEST = "sla_callback_request" TASK_INSTANCE_KEY = "task_instance_key" TRIGGER = "trigger" diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index 12310685ec6929..c9c1f11835277a 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -34,7 +34,7 @@ from pendulum.tz.timezone import FixedTimezone, Timezone from airflow import macros -from airflow.callbacks.callback_requests import DagCallbackRequest, SlaCallbackRequest, TaskCallbackRequest +from airflow.callbacks.callback_requests import DagCallbackRequest, TaskCallbackRequest from airflow.compat.functools import cache from airflow.datasets import ( BaseDataset, @@ -758,8 +758,6 @@ def serialize( return cls._encode(var.to_json(), type_=DAT.TASK_CALLBACK_REQUEST) elif isinstance(var, DagCallbackRequest): return cls._encode(var.to_json(), type_=DAT.DAG_CALLBACK_REQUEST) - elif isinstance(var, SlaCallbackRequest): - return cls._encode(var.to_json(), type_=DAT.SLA_CALLBACK_REQUEST) elif var.__class__ == Context: d = {} for k, v in var._context.items(): @@ -890,8 +888,6 @@ def deserialize(cls, encoded_var: Any, use_pydantic_models=False) -> Any: return TaskCallbackRequest.from_json(var) elif type_ == DAT.DAG_CALLBACK_REQUEST: return DagCallbackRequest.from_json(var) - elif type_ == DAT.SLA_CALLBACK_REQUEST: - return SlaCallbackRequest.from_json(var) elif type_ == DAT.TASK_INSTANCE_KEY: return TaskInstanceKey(**var) elif use_pydantic_models and _ENABLE_AIP_44: @@ -1289,7 +1285,7 @@ def populate_operator(cls, op: Operator, encoded_op: dict[str, Any]) -> None: continue elif k == "downstream_task_ids": v = set(v) - elif k in {"retry_delay", "execution_timeout", "sla", "max_retry_delay"}: + elif k in {"retry_delay", "execution_timeout", "max_retry_delay"}: v = cls._deserialize_timedelta(v) elif k in encoded_op["template_fields"]: pass diff --git a/airflow/settings.py b/airflow/settings.py index a242ce4da76945..7a805f64a29c77 100644 --- a/airflow/settings.py +++ b/airflow/settings.py @@ -781,9 +781,6 @@ def is_usage_data_collection_enabled() -> bool: ALLOW_FUTURE_EXEC_DATES = conf.getboolean("scheduler", "allow_trigger_in_future", fallback=False) -# Whether or not to check each dagrun against defined SLAs -CHECK_SLAS = conf.getboolean("core", "check_slas", fallback=True) - USE_JOB_SCHEDULE = conf.getboolean("scheduler", "use_job_schedule", fallback=True) # By default Airflow plugins are lazily-loaded (only loaded when required). Set it to False, diff --git a/airflow/traces/otel_tracer.py b/airflow/traces/otel_tracer.py index 1f87e458d3c1ba..c6d493db1427a2 100644 --- a/airflow/traces/otel_tracer.py +++ b/airflow/traces/otel_tracer.py @@ -199,7 +199,12 @@ def start_span_from_taskinstance( _links.append( Link( - context=trace.get_current_span().get_span_context(), + context=SpanContext( + trace_id=trace.get_current_span().get_span_context().trace_id, + span_id=span_id, + is_remote=True, + trace_flags=TraceFlags(0x01), + ), attributes={"meta.annotation_type": "link", "from": "parenttrace"}, ) ) diff --git a/airflow/ui/openapi-gen/queries/common.ts b/airflow/ui/openapi-gen/queries/common.ts index d942d51c91e9c2..143ec83c556270 100644 --- a/airflow/ui/openapi-gen/queries/common.ts +++ b/airflow/ui/openapi-gen/queries/common.ts @@ -35,19 +35,23 @@ export const useDagServiceGetDagsPublicDagsGetKey = "DagServiceGetDagsPublicDagsGet"; export const UseDagServiceGetDagsPublicDagsGetKeyFn = ( { + dagDisplayNamePattern, dagIdPattern, limit, offset, onlyActive, orderBy, + owners, paused, tags, }: { + dagDisplayNamePattern?: string; dagIdPattern?: string; limit?: number; offset?: number; onlyActive?: boolean; orderBy?: string; + owners?: string[]; paused?: boolean; tags?: string[]; } = {}, @@ -55,6 +59,16 @@ export const UseDagServiceGetDagsPublicDagsGetKeyFn = ( ) => [ useDagServiceGetDagsPublicDagsGetKey, ...(queryKey ?? [ - { dagIdPattern, limit, offset, onlyActive, orderBy, paused, tags }, + { + dagDisplayNamePattern, + dagIdPattern, + limit, + offset, + onlyActive, + orderBy, + owners, + paused, + tags, + }, ]), ]; diff --git a/airflow/ui/openapi-gen/queries/prefetch.ts b/airflow/ui/openapi-gen/queries/prefetch.ts index 44b8f373534f5d..f8e1bf616d1430 100644 --- a/airflow/ui/openapi-gen/queries/prefetch.ts +++ b/airflow/ui/openapi-gen/queries/prefetch.ts @@ -35,7 +35,9 @@ export const prefetchUseDatasetServiceNextRunDatasetsUiNextRunDatasetsDagIdGet = * @param data.limit * @param data.offset * @param data.tags + * @param data.owners * @param data.dagIdPattern + * @param data.dagDisplayNamePattern * @param data.onlyActive * @param data.paused * @param data.orderBy @@ -45,40 +47,48 @@ export const prefetchUseDatasetServiceNextRunDatasetsUiNextRunDatasetsDagIdGet = export const prefetchUseDagServiceGetDagsPublicDagsGet = ( queryClient: QueryClient, { + dagDisplayNamePattern, dagIdPattern, limit, offset, onlyActive, orderBy, + owners, paused, tags, }: { + dagDisplayNamePattern?: string; dagIdPattern?: string; limit?: number; offset?: number; onlyActive?: boolean; orderBy?: string; + owners?: string[]; paused?: boolean; tags?: string[]; } = {}, ) => queryClient.prefetchQuery({ queryKey: Common.UseDagServiceGetDagsPublicDagsGetKeyFn({ + dagDisplayNamePattern, dagIdPattern, limit, offset, onlyActive, orderBy, + owners, paused, tags, }), queryFn: () => DagService.getDagsPublicDagsGet({ + dagDisplayNamePattern, dagIdPattern, limit, offset, onlyActive, orderBy, + owners, paused, tags, }), diff --git a/airflow/ui/openapi-gen/queries/queries.ts b/airflow/ui/openapi-gen/queries/queries.ts index 55653b622fa07e..9dce528f2a5035 100644 --- a/airflow/ui/openapi-gen/queries/queries.ts +++ b/airflow/ui/openapi-gen/queries/queries.ts @@ -43,7 +43,9 @@ export const useDatasetServiceNextRunDatasetsUiNextRunDatasetsDagIdGet = < * @param data.limit * @param data.offset * @param data.tags + * @param data.owners * @param data.dagIdPattern + * @param data.dagDisplayNamePattern * @param data.onlyActive * @param data.paused * @param data.orderBy @@ -56,19 +58,23 @@ export const useDagServiceGetDagsPublicDagsGet = < TQueryKey extends Array = unknown[], >( { + dagDisplayNamePattern, dagIdPattern, limit, offset, onlyActive, orderBy, + owners, paused, tags, }: { + dagDisplayNamePattern?: string; dagIdPattern?: string; limit?: number; offset?: number; onlyActive?: boolean; orderBy?: string; + owners?: string[]; paused?: boolean; tags?: string[]; } = {}, @@ -77,16 +83,28 @@ export const useDagServiceGetDagsPublicDagsGet = < ) => useQuery({ queryKey: Common.UseDagServiceGetDagsPublicDagsGetKeyFn( - { dagIdPattern, limit, offset, onlyActive, orderBy, paused, tags }, + { + dagDisplayNamePattern, + dagIdPattern, + limit, + offset, + onlyActive, + orderBy, + owners, + paused, + tags, + }, queryKey, ), queryFn: () => DagService.getDagsPublicDagsGet({ + dagDisplayNamePattern, dagIdPattern, limit, offset, onlyActive, orderBy, + owners, paused, tags, }) as TData, diff --git a/airflow/ui/openapi-gen/queries/suspense.ts b/airflow/ui/openapi-gen/queries/suspense.ts index 1e4fb671a11308..bcc95a53e18ffe 100644 --- a/airflow/ui/openapi-gen/queries/suspense.ts +++ b/airflow/ui/openapi-gen/queries/suspense.ts @@ -44,7 +44,9 @@ export const useDatasetServiceNextRunDatasetsUiNextRunDatasetsDagIdGetSuspense = * @param data.limit * @param data.offset * @param data.tags + * @param data.owners * @param data.dagIdPattern + * @param data.dagDisplayNamePattern * @param data.onlyActive * @param data.paused * @param data.orderBy @@ -57,19 +59,23 @@ export const useDagServiceGetDagsPublicDagsGetSuspense = < TQueryKey extends Array = unknown[], >( { + dagDisplayNamePattern, dagIdPattern, limit, offset, onlyActive, orderBy, + owners, paused, tags, }: { + dagDisplayNamePattern?: string; dagIdPattern?: string; limit?: number; offset?: number; onlyActive?: boolean; orderBy?: string; + owners?: string[]; paused?: boolean; tags?: string[]; } = {}, @@ -78,16 +84,28 @@ export const useDagServiceGetDagsPublicDagsGetSuspense = < ) => useSuspenseQuery({ queryKey: Common.UseDagServiceGetDagsPublicDagsGetKeyFn( - { dagIdPattern, limit, offset, onlyActive, orderBy, paused, tags }, + { + dagDisplayNamePattern, + dagIdPattern, + limit, + offset, + onlyActive, + orderBy, + owners, + paused, + tags, + }, queryKey, ), queryFn: () => DagService.getDagsPublicDagsGet({ + dagDisplayNamePattern, dagIdPattern, limit, offset, onlyActive, orderBy, + owners, paused, tags, }) as TData, diff --git a/airflow/ui/openapi-gen/requests/services.gen.ts b/airflow/ui/openapi-gen/requests/services.gen.ts index cf28e39ab109f3..e0786e9137156b 100644 --- a/airflow/ui/openapi-gen/requests/services.gen.ts +++ b/airflow/ui/openapi-gen/requests/services.gen.ts @@ -41,7 +41,9 @@ export class DagService { * @param data.limit * @param data.offset * @param data.tags + * @param data.owners * @param data.dagIdPattern + * @param data.dagDisplayNamePattern * @param data.onlyActive * @param data.paused * @param data.orderBy @@ -58,7 +60,9 @@ export class DagService { limit: data.limit, offset: data.offset, tags: data.tags, + owners: data.owners, dag_id_pattern: data.dagIdPattern, + dag_display_name_pattern: data.dagDisplayNamePattern, only_active: data.onlyActive, paused: data.paused, order_by: data.orderBy, diff --git a/airflow/ui/openapi-gen/requests/types.gen.ts b/airflow/ui/openapi-gen/requests/types.gen.ts index 80e90078150eaf..917dca6626c083 100644 --- a/airflow/ui/openapi-gen/requests/types.gen.ts +++ b/airflow/ui/openapi-gen/requests/types.gen.ts @@ -70,13 +70,15 @@ export type NextRunDatasetsUiNextRunDatasetsDagIdGetResponse = { }; export type GetDagsPublicDagsGetData = { + dagDisplayNamePattern?: string | null; dagIdPattern?: string | null; limit?: number; offset?: number; onlyActive?: boolean; orderBy?: string; + owners?: Array; paused?: boolean | null; - tags?: Array | null; + tags?: Array; }; export type GetDagsPublicDagsGetResponse = DAGCollectionResponse; diff --git a/airflow/ui/package.json b/airflow/ui/package.json index 401d920c20cc47..c7d79f792a59e3 100644 --- a/airflow/ui/package.json +++ b/airflow/ui/package.json @@ -12,7 +12,8 @@ "format": "pnpm prettier --write .", "preview": "vite preview", "codegen": "openapi-rq -i \"../api_fastapi/openapi/v1-generated.yaml\" -c axios --format prettier -o openapi-gen", - "test": "vitest run" + "test": "vitest run", + "coverage": "vitest run --coverage" }, "dependencies": { "@chakra-ui/anatomy": "^2.2.2", @@ -41,6 +42,7 @@ "@types/react": "^18.3.5", "@types/react-dom": "^18.3.0", "@vitejs/plugin-react-swc": "^3.7.0", + "@vitest/coverage-v8": "^2.1.1", "eslint": "^9.10.0", "eslint-config-prettier": "^9.1.0", "eslint-plugin-jsx-a11y": "^6.10.0", @@ -56,6 +58,6 @@ "typescript": "~5.5.4", "typescript-eslint": "^8.5.0", "vite": "^5.4.4", - "vitest": "^2.0.5" + "vitest": "^2.1.1" } } diff --git a/airflow/ui/pnpm-lock.yaml b/airflow/ui/pnpm-lock.yaml index 1e4a99ef60769a..0f9f256941f5e8 100644 --- a/airflow/ui/pnpm-lock.yaml +++ b/airflow/ui/pnpm-lock.yaml @@ -31,7 +31,7 @@ importers: version: 1.7.7 chakra-react-select: specifier: ^4.9.2 - version: 4.9.2(@chakra-ui/form-control@2.2.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/icon@3.2.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/layout@2.3.1(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/media-query@3.3.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/menu@2.2.1(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(framer-motion@11.3.29(@emotion/is-prop-valid@1.3.0)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/spinner@2.1.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + version: 4.9.2(uzcvocchpeesoxvtkif6ppnvaq) framer-motion: specifier: ^11.3.29 version: 11.3.29(@emotion/is-prop-valid@1.3.0)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) @@ -50,7 +50,7 @@ importers: devDependencies: '@7nohe/openapi-react-query-codegen': specifier: ^1.6.0 - version: 1.6.0(commander@12.1.0)(glob@11.0.0)(ts-morph@23.0.0)(typescript@5.5.4) + version: 1.6.0(commander@12.1.0)(glob@11.0.0)(magicast@0.3.5)(ts-morph@23.0.0)(typescript@5.5.4) '@eslint/js': specifier: ^9.10.0 version: 9.10.0 @@ -81,6 +81,9 @@ importers: '@vitejs/plugin-react-swc': specifier: ^3.7.0 version: 3.7.0(vite@5.4.4(@types/node@22.5.4)) + '@vitest/coverage-v8': + specifier: ^2.1.1 + version: 2.1.1(vitest@2.1.1(@types/node@22.5.4)(happy-dom@15.0.0)) eslint: specifier: ^9.10.0 version: 9.10.0(jiti@1.21.6) @@ -127,8 +130,8 @@ importers: specifier: ^5.4.4 version: 5.4.4(@types/node@22.5.4) vitest: - specifier: ^2.0.5 - version: 2.0.5(@types/node@22.5.4)(happy-dom@15.0.0) + specifier: ^2.1.1 + version: 2.1.1(@types/node@22.5.4)(happy-dom@15.0.0) packages: @@ -202,6 +205,11 @@ packages: engines: {node: '>=6.0.0'} hasBin: true + '@babel/parser@7.25.6': + resolution: {integrity: sha512-trGdfBdbD0l1ZPmcJ83eNxB9rbEax4ALFTF7fN386TMYbeCQbyme5cOEXQhbGXKebwGaB/J52w1mrklMcbgy6Q==} + engines: {node: '>=6.0.0'} + hasBin: true + '@babel/runtime@7.25.4': resolution: {integrity: sha512-DSgLeL/FNcpXuzav5wfYvHCGvynXkJbn3Zvc3823AEe9nPwW9IK4UoCSS5yGymmQzN0pCPvivtgS6/8U2kkm1w==} engines: {node: '>=6.9.0'} @@ -230,6 +238,13 @@ packages: resolution: {integrity: sha512-zQ1ijeeCXVEh+aNL0RlmkPkG8HUiDcU2pzQQFjtbntgAczRASFzj4H+6+bV+dy1ntKR14I/DypeuRG1uma98iQ==} engines: {node: '>=6.9.0'} + '@babel/types@7.25.6': + resolution: {integrity: sha512-/l42B1qxpG6RdfYf343Uw1vmDjeNhneUXtzhojE7pDgfpEypmRhI6j1kr17XCVv4Cgl9HdAiQY2x0GwKm7rWCw==} + engines: {node: '>=6.9.0'} + + '@bcoe/v8-coverage@0.2.3': + resolution: {integrity: sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==} + '@chakra-ui/accordion@2.3.1': resolution: {integrity: sha512-FSXRm8iClFyU+gVaXisOSEw0/4Q+qZbFRiuhIAkVU6Boj0FxAMrlo9a8AV5TuF77rgaHytCdHk0Ng+cyUijrag==} peerDependencies: @@ -953,6 +968,10 @@ packages: resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==} engines: {node: '>=12'} + '@istanbuljs/schema@0.1.3': + resolution: {integrity: sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==} + engines: {node: '>=8'} + '@jridgewell/gen-mapping@0.3.5': resolution: {integrity: sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==} engines: {node: '>=6.0.0'} @@ -1227,6 +1246,9 @@ packages: '@types/estree@1.0.5': resolution: {integrity: sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==} + '@types/estree@1.0.6': + resolution: {integrity: sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==} + '@types/json-schema@7.0.15': resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==} @@ -1346,23 +1368,44 @@ packages: peerDependencies: vite: ^4 || ^5 - '@vitest/expect@2.0.5': - resolution: {integrity: sha512-yHZtwuP7JZivj65Gxoi8upUN2OzHTi3zVfjwdpu2WrvCZPLwsJ2Ey5ILIPccoW23dd/zQBlJ4/dhi7DWNyXCpA==} + '@vitest/coverage-v8@2.1.1': + resolution: {integrity: sha512-md/A7A3c42oTT8JUHSqjP5uKTWJejzUW4jalpvs+rZ27gsURsMU8DEb+8Jf8C6Kj2gwfSHJqobDNBuoqlm0cFw==} + peerDependencies: + '@vitest/browser': 2.1.1 + vitest: 2.1.1 + peerDependenciesMeta: + '@vitest/browser': + optional: true - '@vitest/pretty-format@2.0.5': - resolution: {integrity: sha512-h8k+1oWHfwTkyTkb9egzwNMfJAEx4veaPSnMeKbVSjp4euqGSbQlm5+6VHwTr7u4FJslVVsUG5nopCaAYdOmSQ==} + '@vitest/expect@2.1.1': + resolution: {integrity: sha512-YeueunS0HiHiQxk+KEOnq/QMzlUuOzbU1Go+PgAsHvvv3tUkJPm9xWt+6ITNTlzsMXUjmgm5T+U7KBPK2qQV6w==} - '@vitest/runner@2.0.5': - resolution: {integrity: sha512-TfRfZa6Bkk9ky4tW0z20WKXFEwwvWhRY+84CnSEtq4+3ZvDlJyY32oNTJtM7AW9ihW90tX/1Q78cb6FjoAs+ig==} + '@vitest/mocker@2.1.1': + resolution: {integrity: sha512-LNN5VwOEdJqCmJ/2XJBywB11DLlkbY0ooDJW3uRX5cZyYCrc4PI/ePX0iQhE3BiEGiQmK4GE7Q/PqCkkaiPnrA==} + peerDependencies: + '@vitest/spy': 2.1.1 + msw: ^2.3.5 + vite: ^5.0.0 + peerDependenciesMeta: + msw: + optional: true + vite: + optional: true + + '@vitest/pretty-format@2.1.1': + resolution: {integrity: sha512-SjxPFOtuINDUW8/UkElJYQSFtnWX7tMksSGW0vfjxMneFqxVr8YJ979QpMbDW7g+BIiq88RAGDjf7en6rvLPPQ==} - '@vitest/snapshot@2.0.5': - resolution: {integrity: sha512-SgCPUeDFLaM0mIUHfaArq8fD2WbaXG/zVXjRupthYfYGzc8ztbFbu6dUNOblBG7XLMR1kEhS/DNnfCZ2IhdDew==} + '@vitest/runner@2.1.1': + resolution: {integrity: sha512-uTPuY6PWOYitIkLPidaY5L3t0JJITdGTSwBtwMjKzo5O6RCOEncz9PUN+0pDidX8kTHYjO0EwUIvhlGpnGpxmA==} - '@vitest/spy@2.0.5': - resolution: {integrity: sha512-c/jdthAhvJdpfVuaexSrnawxZz6pywlTPe84LUB2m/4t3rl2fTo9NFGBG4oWgaD+FTgDDV8hJ/nibT7IfH3JfA==} + '@vitest/snapshot@2.1.1': + resolution: {integrity: sha512-BnSku1WFy7r4mm96ha2FzN99AZJgpZOWrAhtQfoxjUU5YMRpq1zmHRq7a5K9/NjqonebO7iVDla+VvZS8BOWMw==} - '@vitest/utils@2.0.5': - resolution: {integrity: sha512-d8HKbqIcya+GR67mkZbrzhS5kKhtp8dQLcmRZLGTscGVg7yImT82cIrhtn2L8+VujWcy6KZweApgNmPsTAO/UQ==} + '@vitest/spy@2.1.1': + resolution: {integrity: sha512-ZM39BnZ9t/xZ/nF4UwRH5il0Sw93QnZXd9NAZGRpIgj0yvVwPpLd702s/Cx955rGaMlyBQkZJ2Ir7qyY48VZ+g==} + + '@vitest/utils@2.1.1': + resolution: {integrity: sha512-Y6Q9TsI+qJ2CC0ZKj6VBb+T8UPz593N113nnUykqwANqhgf3QkZeHFlusgKLTqrnVHbj/XDKZcDHol+dxVT+rQ==} '@zag-js/dom-query@0.16.0': resolution: {integrity: sha512-Oqhd6+biWyKnhKwFFuZrrf6lxBz2tX2pRQe6grUnYwO6HJ8BcbqZomy2lpOdr+3itlaUqx+Ywj5E5ZZDr/LBfQ==} @@ -2073,6 +2116,10 @@ packages: resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==} engines: {node: '>=10.13.0'} + glob@10.4.5: + resolution: {integrity: sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==} + hasBin: true + glob@11.0.0: resolution: {integrity: sha512-9UiX/Bl6J2yaBbxKoEBRm4Cipxgok8kQYcOPEhScPwebu2I0HoQOuYdIO6S3hLuWoZgpDpwQZMzTFxgpkyT76g==} engines: {node: 20 || >=22} @@ -2149,6 +2196,9 @@ packages: hosted-git-info@2.8.9: resolution: {integrity: sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==} + html-escaper@2.0.2: + resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + human-signals@5.0.0: resolution: {integrity: sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ==} engines: {node: '>=16.17.0'} @@ -2306,9 +2356,28 @@ packages: isexe@2.0.0: resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} + istanbul-lib-coverage@3.2.2: + resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==} + engines: {node: '>=8'} + + istanbul-lib-report@3.0.1: + resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==} + engines: {node: '>=10'} + + istanbul-lib-source-maps@5.0.6: + resolution: {integrity: sha512-yg2d+Em4KizZC5niWhQaIomgf5WlL4vOOjZ5xGCmF8SnPE/mDWWXgvRExdcpCgh9lLRRa1/fSYp2ymmbJ1pI+A==} + engines: {node: '>=10'} + + istanbul-reports@3.1.7: + resolution: {integrity: sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==} + engines: {node: '>=8'} + iterator.prototype@1.1.2: resolution: {integrity: sha512-DR33HMMr8EzwuRL8Y9D3u2BMj8+RqSE850jfGu59kS7tbmPLzGkZmVSfyCFSDxuZiEY6Rzt3T2NA/qU+NwVj1w==} + jackspeak@3.4.3: + resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==} + jackspeak@4.0.1: resolution: {integrity: sha512-cub8rahkh0Q/bw1+GxP7aeSe29hHHn2V4m29nnDlvCdlgU+3UGxkZp7Z53jLUdpX3jdTO0nJZUDl3xvbWc2Xog==} engines: {node: 20 || >=22} @@ -2398,6 +2467,9 @@ packages: loupe@3.1.1: resolution: {integrity: sha512-edNu/8D5MKVfGVFRhFf8aAxiTM6Wumfz5XsaatSxlD3w4R1d/WEKUTydCdPGbl9K7QG/Ca3GnDV2sIKIpXRQcw==} + lru-cache@10.4.3: + resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==} + lru-cache@11.0.1: resolution: {integrity: sha512-CgeuL5uom6j/ZVrg7G/+1IXqRY8JXX4Hghfy5YE0EhoYQWvndP1kufu58cmZLNIDKnRhZrXfdS9urVWx98AipQ==} engines: {node: 20 || >=22} @@ -2409,6 +2481,13 @@ packages: magic-string@0.30.11: resolution: {integrity: sha512-+Wri9p0QHMy+545hKww7YAu5NyzF8iomPL/RQazugQ9+Ez4Ic3mERMd8ZTX5rfK944j+560ZJi8iAwgak1Ac7A==} + magicast@0.3.5: + resolution: {integrity: sha512-L0WhttDl+2BOsybvEOLK7fW3UA0OQ0IQ2d6Zl2x/a6vVRs3bAY0ECOSHHeL5jD+SbOpOCUEi0y1DgHEn9Qn1AQ==} + + make-dir@4.0.0: + resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} + engines: {node: '>=10'} + memoize-one@6.0.0: resolution: {integrity: sha512-rkpe71W0N0c0Xz6QD0eJETuWAJGnJ9afsl1srmwPrI+yBCkge5EycXXbYRyvL29zZVUWQCY7InPRCv3GDXuZNw==} @@ -2616,6 +2695,10 @@ packages: path-parse@1.0.7: resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==} + path-scurry@1.11.1: + resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==} + engines: {node: '>=16 || 14 >=14.18'} + path-scurry@2.0.0: resolution: {integrity: sha512-ypGJsmGtdXUOeM5u93TyeIEfEhM6s+ljAhrk5vAvSx8uyY/02OvrZnA0YNGUrPXfpJMgI1ODd3nwz8Npx4O4cg==} engines: {node: 20 || >=22} @@ -3008,6 +3091,10 @@ packages: resolution: {integrity: sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==} engines: {node: '>=10'} + test-exclude@7.0.1: + resolution: {integrity: sha512-pFYqmTw68LXVjeWJMST4+borgQP2AyMNbg1BpZh9LbyhUeNkeaPF9gzfPGUAnSMV3qPYdWUwDIjjCLiSDOl7vg==} + engines: {node: '>=18'} + text-table@0.2.0: resolution: {integrity: sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==} @@ -3017,6 +3104,9 @@ packages: tinybench@2.9.0: resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==} + tinyexec@0.3.0: + resolution: {integrity: sha512-tVGE0mVJPGb0chKhqmsoosjsS+qUnJVGJpZgsHYQcGoPlG3B51R3PouqTgEGH2Dc9jjFyOqOpix6ZHNMXp1FZg==} + tinypool@1.0.1: resolution: {integrity: sha512-URZYihUbRPcGv95En+sz6MfghfIc2OJ1sv/RmhWZLouPY0/8Vo80viwPvg3dlaS9fuq7fQMEfgRRK7BBZThBEA==} engines: {node: ^18.0.0 || >=20.0.0} @@ -3025,8 +3115,8 @@ packages: resolution: {integrity: sha512-weEDEq7Z5eTHPDh4xjX789+fHfF+P8boiFB+0vbWzpbnbsEr/GRaohi/uMKxg8RZMXnl1ItAi/IUHWMsjDV7kQ==} engines: {node: '>=14.0.0'} - tinyspy@3.0.0: - resolution: {integrity: sha512-q5nmENpTHgiPVd1cJDDc9cVoYN5x4vCvwT3FMilvKPKneCBZAxn2YWQjDF0UMcE9k0Cay1gBiDfTMU0g+mPMQA==} + tinyspy@3.0.2: + resolution: {integrity: sha512-n1cw8k1k0x4pgA2+9XrOkFydTerNcJ1zWCO5Nn9scWHTD+5tp8dghT2x1uduQePZTZgd3Tupf+x9BxJjeJi77Q==} engines: {node: '>=14.0.0'} to-fast-properties@2.0.0: @@ -3152,8 +3242,8 @@ packages: validate-npm-package-license@3.0.4: resolution: {integrity: sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==} - vite-node@2.0.5: - resolution: {integrity: sha512-LdsW4pxj0Ot69FAoXZ1yTnA9bjGohr2yNBU7QKRxpz8ITSkhuDl6h3zS/tvgz4qrNjeRnvrWeXQ8ZF7Um4W00Q==} + vite-node@2.1.1: + resolution: {integrity: sha512-N/mGckI1suG/5wQI35XeR9rsMsPqKXzq1CdUndzVstBj/HvyxxGctwnK6WX43NGt5L3Z5tcRf83g4TITKJhPrA==} engines: {node: ^18.0.0 || >=20.0.0} hasBin: true @@ -3188,15 +3278,15 @@ packages: terser: optional: true - vitest@2.0.5: - resolution: {integrity: sha512-8GUxONfauuIdeSl5f9GTgVEpg5BTOlplET4WEDaeY2QBiN8wSm68vxN/tb5z405OwppfoCavnwXafiaYBC/xOA==} + vitest@2.1.1: + resolution: {integrity: sha512-97We7/VC0e9X5zBVkvt7SGQMGrRtn3KtySFQG5fpaMlS+l62eeXRQO633AYhSTC3z7IMebnPPNjGXVGNRFlxBA==} engines: {node: ^18.0.0 || >=20.0.0} hasBin: true peerDependencies: '@edge-runtime/vm': '*' '@types/node': ^18.0.0 || >=20.0.0 - '@vitest/browser': 2.0.5 - '@vitest/ui': 2.0.5 + '@vitest/browser': 2.1.1 + '@vitest/ui': 2.1.1 happy-dom: '*' jsdom: '*' peerDependenciesMeta: @@ -3274,9 +3364,9 @@ packages: snapshots: - '@7nohe/openapi-react-query-codegen@1.6.0(commander@12.1.0)(glob@11.0.0)(ts-morph@23.0.0)(typescript@5.5.4)': + '@7nohe/openapi-react-query-codegen@1.6.0(commander@12.1.0)(glob@11.0.0)(magicast@0.3.5)(ts-morph@23.0.0)(typescript@5.5.4)': dependencies: - '@hey-api/openapi-ts': 0.52.0(typescript@5.5.4) + '@hey-api/openapi-ts': 0.52.0(magicast@0.3.5)(typescript@5.5.4) commander: 12.1.0 glob: 11.0.0 ts-morph: 23.0.0 @@ -3354,6 +3444,10 @@ snapshots: dependencies: '@babel/types': 7.25.4 + '@babel/parser@7.25.6': + dependencies: + '@babel/types': 7.25.6 + '@babel/runtime@7.25.4': dependencies: regenerator-runtime: 0.14.1 @@ -3406,6 +3500,14 @@ snapshots: '@babel/helper-validator-identifier': 7.24.7 to-fast-properties: 2.0.0 + '@babel/types@7.25.6': + dependencies: + '@babel/helper-string-parser': 7.24.8 + '@babel/helper-validator-identifier': 7.24.7 + to-fast-properties: 2.0.0 + + '@bcoe/v8-coverage@0.2.3': {} + '@chakra-ui/accordion@2.3.1(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(framer-motion@11.3.29(@emotion/is-prop-valid@1.3.0)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(react@18.3.1)': dependencies: '@chakra-ui/descendant': 3.1.0(react@18.3.1) @@ -4307,10 +4409,10 @@ snapshots: '@floating-ui/utils@0.2.7': {} - '@hey-api/openapi-ts@0.52.0(typescript@5.5.4)': + '@hey-api/openapi-ts@0.52.0(magicast@0.3.5)(typescript@5.5.4)': dependencies: '@apidevtools/json-schema-ref-parser': 11.6.4 - c12: 1.11.1 + c12: 1.11.1(magicast@0.3.5) camelcase: 8.0.0 commander: 12.1.0 handlebars: 4.7.8 @@ -4331,6 +4433,8 @@ snapshots: wrap-ansi: 8.1.0 wrap-ansi-cjs: wrap-ansi@7.0.0 + '@istanbuljs/schema@0.1.3': {} + '@jridgewell/gen-mapping@0.3.5': dependencies: '@jridgewell/set-array': 1.2.1 @@ -4561,6 +4665,8 @@ snapshots: '@types/estree@1.0.5': {} + '@types/estree@1.0.6': {} + '@types/json-schema@7.0.15': {} '@types/lodash.mergewith@4.6.7': @@ -4718,36 +4824,61 @@ snapshots: transitivePeerDependencies: - '@swc/helpers' - '@vitest/expect@2.0.5': + '@vitest/coverage-v8@2.1.1(vitest@2.1.1(@types/node@22.5.4)(happy-dom@15.0.0))': dependencies: - '@vitest/spy': 2.0.5 - '@vitest/utils': 2.0.5 + '@ampproject/remapping': 2.3.0 + '@bcoe/v8-coverage': 0.2.3 + debug: 4.3.7 + istanbul-lib-coverage: 3.2.2 + istanbul-lib-report: 3.0.1 + istanbul-lib-source-maps: 5.0.6 + istanbul-reports: 3.1.7 + magic-string: 0.30.11 + magicast: 0.3.5 + std-env: 3.7.0 + test-exclude: 7.0.1 + tinyrainbow: 1.2.0 + vitest: 2.1.1(@types/node@22.5.4)(happy-dom@15.0.0) + transitivePeerDependencies: + - supports-color + + '@vitest/expect@2.1.1': + dependencies: + '@vitest/spy': 2.1.1 + '@vitest/utils': 2.1.1 chai: 5.1.1 tinyrainbow: 1.2.0 - '@vitest/pretty-format@2.0.5': + '@vitest/mocker@2.1.1(@vitest/spy@2.1.1)(vite@5.4.4(@types/node@22.5.4))': + dependencies: + '@vitest/spy': 2.1.1 + estree-walker: 3.0.3 + magic-string: 0.30.11 + optionalDependencies: + vite: 5.4.4(@types/node@22.5.4) + + '@vitest/pretty-format@2.1.1': dependencies: tinyrainbow: 1.2.0 - '@vitest/runner@2.0.5': + '@vitest/runner@2.1.1': dependencies: - '@vitest/utils': 2.0.5 + '@vitest/utils': 2.1.1 pathe: 1.1.2 - '@vitest/snapshot@2.0.5': + '@vitest/snapshot@2.1.1': dependencies: - '@vitest/pretty-format': 2.0.5 + '@vitest/pretty-format': 2.1.1 magic-string: 0.30.11 pathe: 1.1.2 - '@vitest/spy@2.0.5': + '@vitest/spy@2.1.1': dependencies: - tinyspy: 3.0.0 + tinyspy: 3.0.2 - '@vitest/utils@2.0.5': + '@vitest/utils@2.1.1': dependencies: - '@vitest/pretty-format': 2.0.5 - estree-walker: 3.0.3 + '@vitest/pretty-format': 2.1.1 loupe: 3.1.1 tinyrainbow: 1.2.0 @@ -4919,7 +5050,7 @@ snapshots: builtin-modules@3.3.0: {} - c12@1.11.1: + c12@1.11.1(magicast@0.3.5): dependencies: chokidar: 3.6.0 confbox: 0.1.7 @@ -4933,6 +5064,8 @@ snapshots: perfect-debounce: 1.0.0 pkg-types: 1.1.3 rc9: 2.1.2 + optionalDependencies: + magicast: 0.3.5 cac@6.7.14: {} @@ -4958,8 +5091,8 @@ snapshots: loupe: 3.1.1 pathval: 2.0.0 - ? chakra-react-select@4.9.2(@chakra-ui/form-control@2.2.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/icon@3.2.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/layout@2.3.1(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/media-query@3.3.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/menu@2.2.1(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(framer-motion@11.3.29(@emotion/is-prop-valid@1.3.0)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/spinner@2.1.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1))(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) - : dependencies: + chakra-react-select@4.9.2(uzcvocchpeesoxvtkif6ppnvaq): + dependencies: '@chakra-ui/form-control': 2.2.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1) '@chakra-ui/icon': 3.2.0(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1) '@chakra-ui/layout': 2.3.1(@chakra-ui/system@2.6.2(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@emotion/styled@11.13.0(@emotion/react@11.13.3(@types/react@18.3.5)(react@18.3.1))(@types/react@18.3.5)(react@18.3.1))(react@18.3.1))(react@18.3.1) @@ -5485,7 +5618,7 @@ snapshots: estree-walker@3.0.3: dependencies: - '@types/estree': 1.0.5 + '@types/estree': 1.0.6 esutils@2.0.3: {} @@ -5638,6 +5771,15 @@ snapshots: dependencies: is-glob: 4.0.3 + glob@10.4.5: + dependencies: + foreground-child: 3.3.0 + jackspeak: 3.4.3 + minimatch: 9.0.5 + minipass: 7.1.2 + package-json-from-dist: 1.0.0 + path-scurry: 1.11.1 + glob@11.0.0: dependencies: foreground-child: 3.3.0 @@ -5716,6 +5858,8 @@ snapshots: hosted-git-info@2.8.9: {} + html-escaper@2.0.2: {} + human-signals@5.0.0: {} ignore@5.3.2: {} @@ -5854,6 +5998,27 @@ snapshots: isexe@2.0.0: {} + istanbul-lib-coverage@3.2.2: {} + + istanbul-lib-report@3.0.1: + dependencies: + istanbul-lib-coverage: 3.2.2 + make-dir: 4.0.0 + supports-color: 7.2.0 + + istanbul-lib-source-maps@5.0.6: + dependencies: + '@jridgewell/trace-mapping': 0.3.25 + debug: 4.3.7 + istanbul-lib-coverage: 3.2.2 + transitivePeerDependencies: + - supports-color + + istanbul-reports@3.1.7: + dependencies: + html-escaper: 2.0.2 + istanbul-lib-report: 3.0.1 + iterator.prototype@1.1.2: dependencies: define-properties: 1.2.1 @@ -5862,6 +6027,12 @@ snapshots: reflect.getprototypeof: 1.0.6 set-function-name: 2.0.2 + jackspeak@3.4.3: + dependencies: + '@isaacs/cliui': 8.0.2 + optionalDependencies: + '@pkgjs/parseargs': 0.11.0 + jackspeak@4.0.1: dependencies: '@isaacs/cliui': 8.0.2 @@ -5938,6 +6109,8 @@ snapshots: dependencies: get-func-name: 2.0.2 + lru-cache@10.4.3: {} + lru-cache@11.0.1: {} lz-string@1.5.0: {} @@ -5946,6 +6119,16 @@ snapshots: dependencies: '@jridgewell/sourcemap-codec': 1.5.0 + magicast@0.3.5: + dependencies: + '@babel/parser': 7.25.6 + '@babel/types': 7.25.6 + source-map-js: 1.2.1 + + make-dir@4.0.0: + dependencies: + semver: 7.6.3 + memoize-one@6.0.0: {} merge-stream@2.0.0: {} @@ -6136,6 +6319,11 @@ snapshots: path-parse@1.0.7: {} + path-scurry@1.11.1: + dependencies: + lru-cache: 10.4.3 + minipass: 7.1.2 + path-scurry@2.0.0: dependencies: lru-cache: 11.0.1 @@ -6580,17 +6768,25 @@ snapshots: mkdirp: 1.0.4 yallist: 4.0.0 + test-exclude@7.0.1: + dependencies: + '@istanbuljs/schema': 0.1.3 + glob: 10.4.5 + minimatch: 9.0.5 + text-table@0.2.0: {} tiny-invariant@1.3.3: {} tinybench@2.9.0: {} + tinyexec@0.3.0: {} + tinypool@1.0.1: {} tinyrainbow@1.2.0: {} - tinyspy@3.0.0: {} + tinyspy@3.0.2: {} to-fast-properties@2.0.0: {} @@ -6716,12 +6912,11 @@ snapshots: spdx-correct: 3.2.0 spdx-expression-parse: 3.0.1 - vite-node@2.0.5(@types/node@22.5.4): + vite-node@2.1.1(@types/node@22.5.4): dependencies: cac: 6.7.14 - debug: 4.3.6 + debug: 4.3.7 pathe: 1.1.2 - tinyrainbow: 1.2.0 vite: 5.4.4(@types/node@22.5.4) transitivePeerDependencies: - '@types/node' @@ -6743,26 +6938,26 @@ snapshots: '@types/node': 22.5.4 fsevents: 2.3.3 - vitest@2.0.5(@types/node@22.5.4)(happy-dom@15.0.0): + vitest@2.1.1(@types/node@22.5.4)(happy-dom@15.0.0): dependencies: - '@ampproject/remapping': 2.3.0 - '@vitest/expect': 2.0.5 - '@vitest/pretty-format': 2.0.5 - '@vitest/runner': 2.0.5 - '@vitest/snapshot': 2.0.5 - '@vitest/spy': 2.0.5 - '@vitest/utils': 2.0.5 + '@vitest/expect': 2.1.1 + '@vitest/mocker': 2.1.1(@vitest/spy@2.1.1)(vite@5.4.4(@types/node@22.5.4)) + '@vitest/pretty-format': 2.1.1 + '@vitest/runner': 2.1.1 + '@vitest/snapshot': 2.1.1 + '@vitest/spy': 2.1.1 + '@vitest/utils': 2.1.1 chai: 5.1.1 - debug: 4.3.6 - execa: 8.0.1 + debug: 4.3.7 magic-string: 0.30.11 pathe: 1.1.2 std-env: 3.7.0 tinybench: 2.9.0 + tinyexec: 0.3.0 tinypool: 1.0.1 tinyrainbow: 1.2.0 vite: 5.4.4(@types/node@22.5.4) - vite-node: 2.0.5(@types/node@22.5.4) + vite-node: 2.1.1(@types/node@22.5.4) why-is-node-running: 2.3.0 optionalDependencies: '@types/node': 22.5.4 @@ -6770,6 +6965,7 @@ snapshots: transitivePeerDependencies: - less - lightningcss + - msw - sass - sass-embedded - stylus diff --git a/airflow/ui/src/utils/ChakraWrapper.test.tsx b/airflow/ui/src/utils/ChakraWrapper.test.tsx new file mode 100644 index 00000000000000..45508e930eed1f --- /dev/null +++ b/airflow/ui/src/utils/ChakraWrapper.test.tsx @@ -0,0 +1,40 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { render } from "@testing-library/react"; +import { describe, it, expect } from "vitest"; + +import { ChakraWrapper } from "src/utils/ChakraWrapper.tsx"; + +describe("ChakraWrapper", () => { + it("renders children correctly", () => { + const { getByText } = render( + +
Test Child
+
, + ); + + expect(getByText("Test Child")).toBeInTheDocument(); + }); + + it("renders without children", () => { + const { container } = render(); + + expect(container).toBeInTheDocument(); + }); +}); diff --git a/airflow/ui/src/utils/RouterWrapper.test.tsx b/airflow/ui/src/utils/RouterWrapper.test.tsx new file mode 100644 index 00000000000000..b3af0db1439025 --- /dev/null +++ b/airflow/ui/src/utils/RouterWrapper.test.tsx @@ -0,0 +1,40 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { render } from "@testing-library/react"; +import { describe, it, expect } from "vitest"; + +import { RouterWrapper } from "src/utils/RouterWrapper.tsx"; + +describe("RouterWrapper", () => { + it("renders children correctly", () => { + const { getByText } = render( + +
Test Child
+
, + ); + + expect(getByText("Test Child")).toBeInTheDocument(); + }); + + it("renders without children", () => { + const { container } = render(); + + expect(container).toBeInTheDocument(); + }); +}); diff --git a/airflow/ui/vite.config.ts b/airflow/ui/vite.config.ts index 17f03d0ee3fb70..06ad450f377a17 100644 --- a/airflow/ui/vite.config.ts +++ b/airflow/ui/vite.config.ts @@ -35,6 +35,9 @@ export default defineConfig({ ], resolve: { alias: { openapi: "/openapi-gen", src: "/src" } }, test: { + coverage: { + include: ["src/**/*.ts", "src/**/*.tsx"], + }, css: true, environment: "happy-dom", globals: true, diff --git a/airflow/utils/sqlalchemy.py b/airflow/utils/sqlalchemy.py index 33822216f4db50..c81ecf037a62e6 100644 --- a/airflow/utils/sqlalchemy.py +++ b/airflow/utils/sqlalchemy.py @@ -41,6 +41,7 @@ from sqlalchemy.sql.expression import ColumnOperators from sqlalchemy.types import TypeEngine + log = logging.getLogger(__name__) diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 81759f924dd7ab..e4365d0a93caf4 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -20,7 +20,7 @@ apiVersion: v2 name: airflow version: 1.16.0-dev -appVersion: 2.10.1 +appVersion: 2.10.2 description: The official Helm chart to deploy Apache Airflow, a platform to programmatically author, schedule, and monitor workflows home: https://airflow.apache.org/ @@ -47,23 +47,23 @@ annotations: url: https://airflow.apache.org/docs/helm-chart/1.16.0/ artifacthub.io/screenshots: | - title: DAGs View - url: https://airflow.apache.org/docs/apache-airflow/2.10.1/_images/dags.png + url: https://airflow.apache.org/docs/apache-airflow/2.10.2/_images/dags.png - title: Datasets View - url: https://airflow.apache.org/docs/apache-airflow/2.10.1/_images/datasets.png + url: https://airflow.apache.org/docs/apache-airflow/2.10.2/_images/datasets.png - title: Grid View - url: https://airflow.apache.org/docs/apache-airflow/2.10.1/_images/grid.png + url: https://airflow.apache.org/docs/apache-airflow/2.10.2/_images/grid.png - title: Graph View - url: https://airflow.apache.org/docs/apache-airflow/2.10.1/_images/graph.png + url: https://airflow.apache.org/docs/apache-airflow/2.10.2/_images/graph.png - title: Calendar View - url: https://airflow.apache.org/docs/apache-airflow/2.10.1/_images/calendar.png + url: https://airflow.apache.org/docs/apache-airflow/2.10.2/_images/calendar.png - title: Variable View - url: https://airflow.apache.org/docs/apache-airflow/2.10.1/_images/variable_hidden.png + url: https://airflow.apache.org/docs/apache-airflow/2.10.2/_images/variable_hidden.png - title: Gantt Chart - url: https://airflow.apache.org/docs/apache-airflow/2.10.1/_images/gantt.png + url: https://airflow.apache.org/docs/apache-airflow/2.10.2/_images/gantt.png - title: Task Duration - url: https://airflow.apache.org/docs/apache-airflow/2.10.1/_images/duration.png + url: https://airflow.apache.org/docs/apache-airflow/2.10.2/_images/duration.png - title: Code View - url: https://airflow.apache.org/docs/apache-airflow/2.10.1/_images/code.png + url: https://airflow.apache.org/docs/apache-airflow/2.10.2/_images/code.png artifacthub.io/changes: | - description: Add git-sync container lifecycle hooks kind: added diff --git a/chart/newsfragments/42069.significant.rst b/chart/newsfragments/42069.significant.rst deleted file mode 100644 index 01e34850102935..00000000000000 --- a/chart/newsfragments/42069.significant.rst +++ /dev/null @@ -1,3 +0,0 @@ -Default Airflow image is updated to ``2.10.1`` - -The default Airflow image that is used with the Chart is now ``2.10.1``, previously it was ``2.9.3``. diff --git a/chart/newsfragments/42406.significant.rst b/chart/newsfragments/42406.significant.rst new file mode 100644 index 00000000000000..fda06ea7b28ff2 --- /dev/null +++ b/chart/newsfragments/42406.significant.rst @@ -0,0 +1,3 @@ +Default Airflow image is updated to ``2.10.2`` + +The default Airflow image that is used with the Chart is now ``2.10.2``, previously it was ``2.9.3``. diff --git a/chart/templates/flower/flower-ingress.yaml b/chart/templates/flower/flower-ingress.yaml index 7c798ad9fbcfb6..1b24d825880691 100644 --- a/chart/templates/flower/flower-ingress.yaml +++ b/chart/templates/flower/flower-ingress.yaml @@ -22,10 +22,11 @@ ################################# {{- if .Values.flower.enabled }} {{- if and (or .Values.ingress.flower.enabled .Values.ingress.enabled) (or (eq .Values.executor "CeleryExecutor") (eq .Values.executor "CeleryKubernetesExecutor")) }} +{{- $fullname := (include "airflow.fullname" .) }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: {{ include "airflow.fullname" . }}-flower-ingress + name: {{ $fullname }}-flower-ingress labels: tier: airflow component: flower-ingress @@ -72,7 +73,7 @@ spec: paths: - backend: service: - name: {{ $.Release.Name }}-flower + name: {{ $fullname }}-flower port: name: flower-ui {{- if $.Values.ingress.flower.path }} diff --git a/chart/templates/jobs/migrate-database-job.yaml b/chart/templates/jobs/migrate-database-job.yaml index d7747970b88d90..297253e871335d 100644 --- a/chart/templates/jobs/migrate-database-job.yaml +++ b/chart/templates/jobs/migrate-database-job.yaml @@ -117,6 +117,9 @@ spec: - name: PYTHONUNBUFFERED value: "1" {{- include "standard_airflow_environment" . | indent 10 }} + {{- if .Values.migrateDatabaseJob.env }} + {{- tpl (toYaml .Values.migrateDatabaseJob.env) $ | nindent 12 }} + {{- end }} resources: {{- toYaml .Values.migrateDatabaseJob.resources | nindent 12 }} volumeMounts: {{- include "airflow_config_mount" . | nindent 12 }} diff --git a/chart/values.schema.json b/chart/values.schema.json index 4a73c3b4bfde96..948f09f3b9a4da 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -78,7 +78,7 @@ "defaultAirflowTag": { "description": "Default airflow tag to deploy.", "type": "string", - "default": "2.10.1", + "default": "2.10.2", "x-docsSection": "Common" }, "defaultAirflowDigest": { @@ -93,7 +93,7 @@ "airflowVersion": { "description": "Airflow version (Used to make some decisions based on Airflow Version being deployed).", "type": "string", - "default": "2.10.1", + "default": "2.10.2", "x-docsSection": "Common" }, "securityContext": { @@ -4651,6 +4651,16 @@ "null" ], "default": 300 + }, + "env": { + "description": "Add additional env vars to migrate database job.", + "items": { + "$ref": "#/definitions/io.k8s.api.core.v1.EnvVar" + }, + "type": "array", + "default": [], + "x-kubernetes-patch-merge-key": "name", + "x-kubernetes-patch-strategy": "merge" } } }, diff --git a/chart/values.yaml b/chart/values.yaml index 5d86c3557bafa9..7bfa733a905b4c 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -68,13 +68,13 @@ airflowHome: /opt/airflow defaultAirflowRepository: apache/airflow # Default airflow tag to deploy -defaultAirflowTag: "2.10.1" +defaultAirflowTag: "2.10.2" # Default airflow digest. If specified, it takes precedence over tag defaultAirflowDigest: ~ # Airflow version (Used to make some decisions based on Airflow Version being deployed) -airflowVersion: "2.10.1" +airflowVersion: "2.10.2" # Images images: @@ -1236,6 +1236,7 @@ migrateDatabaseJob: # Disable this if you are using ArgoCD for example useHelmHooks: true applyCustomEnv: true + env: [] # rpcServer support is experimental / dev purpose only and will later be renamed _rpcServer: diff --git a/contributing-docs/14_node_environment_setup.rst b/contributing-docs/14_node_environment_setup.rst index 99da685aeb690f..8d98f0860fc8b2 100644 --- a/contributing-docs/14_node_environment_setup.rst +++ b/contributing-docs/14_node_environment_setup.rst @@ -70,6 +70,9 @@ Follow the `pnpm docs `__ to install pnpm locally # Run tests for all .test.ts, test.tsx files pnpm test + # Run coverage + pnpm coverage + # Generate queries and types from the REST API OpenAPI spec pnpm codegen diff --git a/dev/breeze/doc/01_installation.rst b/dev/breeze/doc/01_installation.rst index 6ff68d2bb6455a..7107be900d97b6 100644 --- a/dev/breeze/doc/01_installation.rst +++ b/dev/breeze/doc/01_installation.rst @@ -234,8 +234,19 @@ In case of disk space errors on macOS, increase the disk space available for Doc Installation ============ +First, clone the Airflow repository, but make sure not to clone it into your home directory. Cloning it into your home directory will cause the following error: +``Your Airflow sources are checked out in /Users/username/airflow, which is also your AIRFLOW_HOME where Airflow writes logs and database files. This setup is problematic because Airflow might overwrite or clean up your source code and .git repository.`` + +.. code-block:: bash + + git clone https://github.com/apache/airflow.git + +Set your working directory to the root of this cloned repository. + +.. code-block:: bash + + cd airflow -Set your working directory to root of (this) cloned repository. Run this command to install Breeze (make sure to use ``-e`` flag): .. code-block:: bash diff --git a/docs/apache-airflow-providers-amazon/auth-manager/index.rst b/docs/apache-airflow-providers-amazon/auth-manager/index.rst index 7d9b226037cf38..c01fc5403e2885 100644 --- a/docs/apache-airflow-providers-amazon/auth-manager/index.rst +++ b/docs/apache-airflow-providers-amazon/auth-manager/index.rst @@ -22,9 +22,6 @@ AWS auth manager .. warning:: The AWS auth manager is alpha/experimental at the moment and may be subject to change without warning. -Before reading this, you should be familiar with the concept of auth manager. -See :doc:`apache-airflow:core-concepts/auth-manager`. - The AWS auth manager is an auth manager powered by AWS. It uses two services: * `AWS IAM Identity Center `_ for authentication purposes diff --git a/docs/apache-airflow-providers-amazon/auth-manager/setup/identity-center.rst b/docs/apache-airflow-providers-amazon/auth-manager/setup/identity-center.rst index a134dfe0ddf7c3..ff2dc6295eb83d 100644 --- a/docs/apache-airflow-providers-amazon/auth-manager/setup/identity-center.rst +++ b/docs/apache-airflow-providers-amazon/auth-manager/setup/identity-center.rst @@ -27,55 +27,23 @@ Create resources ================ The AWS auth manager needs two resources in AWS IAM Identity Center: an instance and an application. -You can create them either through the provided CLI command or manually. +You can must create them manually. -Create resources with CLI -------------------------- - -.. note:: - The CLI command is not compatible with AWS accounts that are managed through AWS organizations. - If your AWS account is managed through an AWS organization, please follow the - :ref:`manual configuration `. - -.. note:: - To create all necessary resources for the AWS Auth Manager, you can utilize the CLI command provided as part of the - AWS auth manager. Before executing the command, ensure the AWS auth manager is configured as the auth manager - for the Airflow instance. See :doc:`/auth-manager/setup/config`. - -To create the resources, please run the following command: - -.. code-block:: bash - - airflow aws-auth-manager init-identity-center - -The CLI command should exit successfully with the message: :: - - AWS IAM Identity Center resources created successfully. - -If the CLI command exited with an error, please look carefully at the CLI command output to understand which resource(s) -have or have not been created successfully. The resource(s) which have not been successfully created need to be -:ref:`created manually `. - -If the error message below is raised, please create the AWS IAM Identity Center application through the console -following :ref:`these instructions `: :: - - Creation of SAML applications is only supported in AWS console today. Please create the application through the console. - -.. _identity_center_manual_configuration: +Create the instance +------------------- -Create resources manually -------------------------- +The AWS auth manager leverages SAML 2.0 as the underlying technology powering authentication against AWS Identity Center. -Create the instance -~~~~~~~~~~~~~~~~~~~ +There are several instance types, but only Organization level instances can use SAML 2.0 applications. See more details +about instances types `here `_. -Please follow `AWS documentation `_ -to create the AWS IAM Identity Center instance. +Please follow `AWS documentation `_ +to create the AWS IAM Identity Center instance at the organization level. .. _identity_center_manual_configuration_application: Create the application -~~~~~~~~~~~~~~~~~~~~~~ +---------------------- Please follow the instructions below to create the AWS IAM Identity Center application. diff --git a/docs/apache-airflow-providers-amazon/notifications/chime_notifier_howto_guide.rst b/docs/apache-airflow-providers-amazon/notifications/chime_notifier_howto_guide.rst index c10b8cbae41428..a52540fe78282d 100644 --- a/docs/apache-airflow-providers-amazon/notifications/chime_notifier_howto_guide.rst +++ b/docs/apache-airflow-providers-amazon/notifications/chime_notifier_howto_guide.rst @@ -23,10 +23,6 @@ Introduction Chime notifier (:class:`airflow.providers.amazon.aws.notifications.chime.ChimeNotifier`) allows users to send messages to a Chime chat room setup via a webhook using the various ``on_*_callbacks`` at both the DAG level and Task level -You can also use a notifier with ``sla_miss_callback``. - -.. note:: - When notifiers are used with `sla_miss_callback` the context will contain only values passed to the callback, refer :ref:`sla_miss_callback`. Example Code: ------------- diff --git a/docs/apache-airflow-providers-amazon/notifications/sns.rst b/docs/apache-airflow-providers-amazon/notifications/sns.rst index 337e82cf62eb46..bbaad4f814712c 100644 --- a/docs/apache-airflow-providers-amazon/notifications/sns.rst +++ b/docs/apache-airflow-providers-amazon/notifications/sns.rst @@ -25,11 +25,6 @@ Introduction `Amazon SNS `__ notifier :class:`~airflow.providers.amazon.aws.notifications.sns.SnsNotifier` allows users to push messages to a SNS Topic using the various ``on_*_callbacks`` at both the DAG level and Task level. -You can also use a notifier with ``sla_miss_callback``. - -.. note:: - When notifiers are used with ``sla_miss_callback`` the context will contain only values passed to the callback, - refer :ref:`sla_miss_callback`. Example Code: ------------- diff --git a/docs/apache-airflow-providers-amazon/notifications/sqs.rst b/docs/apache-airflow-providers-amazon/notifications/sqs.rst index 4a2232b006a037..6951caa9fdd67e 100644 --- a/docs/apache-airflow-providers-amazon/notifications/sqs.rst +++ b/docs/apache-airflow-providers-amazon/notifications/sqs.rst @@ -25,11 +25,6 @@ Introduction `Amazon SQS `__ notifier :class:`~airflow.providers.amazon.aws.notifications.sqs.SqsNotifier` allows users to push messages to an Amazon SQS Queue using the various ``on_*_callbacks`` at both the DAG level and Task level. -You can also use a notifier with ``sla_miss_callback``. - -.. note:: - When notifiers are used with ``sla_miss_callback`` the context will contain only values passed to the callback, - refer :ref:`sla_miss_callback`. Example Code: ------------- diff --git a/docs/apache-airflow-providers-amazon/operators/redshift/redshift_data.rst b/docs/apache-airflow-providers-amazon/operators/redshift/redshift_data.rst index 0b314d34f31930..2638e1732cd6c2 100644 --- a/docs/apache-airflow-providers-amazon/operators/redshift/redshift_data.rst +++ b/docs/apache-airflow-providers-amazon/operators/redshift/redshift_data.rst @@ -54,6 +54,18 @@ the necessity of a Postgres connection. :start-after: [START howto_operator_redshift_data] :end-before: [END howto_operator_redshift_data] +Reuse a session when executing multiple statements +================================================== + +Specify the ``session_keep_alive_seconds`` parameter on an upstream task. In a downstream task, get the session ID from +the XCom and pass it to the ``session_id`` parameter. This is useful when you work with temporary tables. + +.. exampleinclude:: /../../tests/system/providers/amazon/aws/example_redshift.py + :language: python + :dedent: 4 + :start-after: [START howto_operator_redshift_data_session_reuse] + :end-before: [END howto_operator_redshift_data_session_reuse] + Reference --------- diff --git a/docs/apache-airflow-providers-google/operators/cloud/cloud_sql.rst b/docs/apache-airflow-providers-google/operators/cloud/cloud_sql.rst index ec334c08955135..42a32712867d96 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/cloud_sql.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/cloud_sql.rst @@ -180,15 +180,6 @@ it will be retrieved from the Google Cloud connection used. Both variants are sh :start-after: [START howto_operator_cloudsql_delete] :end-before: [END howto_operator_cloudsql_delete] -Note: If the instance has read or failover replicas you need to delete them before you delete the primary instance. -Replicas are deleted the same way as primary instances: - -.. exampleinclude:: /../../tests/system/providers/google/cloud/cloud_sql/example_cloud_sql.py - :language: python - :dedent: 4 - :start-after: [START howto_operator_cloudsql_replicas_delete] - :end-before: [END howto_operator_cloudsql_replicas_delete] - Templating """""""""" @@ -393,16 +384,6 @@ Example body defining the instance with failover replica: :start-after: [START howto_operator_cloudsql_create_body] :end-before: [END howto_operator_cloudsql_create_body] -Example body defining read replica for the instance above: - -.. exampleinclude:: /../../tests/system/providers/google/cloud/cloud_sql/example_cloud_sql.py - :language: python - :start-after: [START howto_operator_cloudsql_create_replica] - :end-before: [END howto_operator_cloudsql_create_replica] - -Note: Failover replicas are created together with the instance in a single task. -Read replicas need to be created in separate tasks. - Using the operator """""""""""""""""" diff --git a/docs/apache-airflow-providers-pagerduty/notifications/pagerduty_notifier_howto_guide.rst b/docs/apache-airflow-providers-pagerduty/notifications/pagerduty_notifier_howto_guide.rst index d93d5a2fc5757d..d16f9b2b9e48a5 100644 --- a/docs/apache-airflow-providers-pagerduty/notifications/pagerduty_notifier_howto_guide.rst +++ b/docs/apache-airflow-providers-pagerduty/notifications/pagerduty_notifier_howto_guide.rst @@ -23,10 +23,6 @@ Introduction The Pagerduty notifier (:class:`airflow.providers.pagerduty.notifications.pagerduty.PagerdutyNotifier`) allows users to send messages to Pagerduty using the various ``on_*_callbacks`` at both the DAG level and Task level. -You can also use a notifier with ``sla_miss_callback``. - -.. note:: - When notifiers are used with `sla_miss_callback` the context will contain only values passed to the callback, refer :ref:`sla_miss_callback`. Example Code: ------------- diff --git a/docs/apache-airflow-providers-slack/notifications/slack_notifier_howto_guide.rst b/docs/apache-airflow-providers-slack/notifications/slack_notifier_howto_guide.rst index d967779cee9c5c..a4f891f8a57bb9 100644 --- a/docs/apache-airflow-providers-slack/notifications/slack_notifier_howto_guide.rst +++ b/docs/apache-airflow-providers-slack/notifications/slack_notifier_howto_guide.rst @@ -23,10 +23,6 @@ Introduction Slack notifier (:class:`airflow.providers.slack.notifications.slack.SlackNotifier`) allows users to send messages to a slack channel using the various ``on_*_callbacks`` at both the DAG level and Task level -You can also use a notifier with ``sla_miss_callback``. - -.. note:: - When notifiers are used with `sla_miss_callback` the context will contain only values passed to the callback, refer :ref:`sla_miss_callback`. Example Code: ------------- diff --git a/docs/apache-airflow-providers-slack/notifications/slackwebhook_notifier_howto_guide.rst b/docs/apache-airflow-providers-slack/notifications/slackwebhook_notifier_howto_guide.rst index bb9e85c67466ff..66ced818a7d188 100644 --- a/docs/apache-airflow-providers-slack/notifications/slackwebhook_notifier_howto_guide.rst +++ b/docs/apache-airflow-providers-slack/notifications/slackwebhook_notifier_howto_guide.rst @@ -24,10 +24,6 @@ Slack Incoming Webhook notifier (:class:`airflow.providers.slack.notifications.s allows users to send messages to a slack channel through `Incoming Webhook `__ using the various ``on_*_callbacks`` at both the DAG level and Task level -You can also use a notifier with ``sla_miss_callback``. - -.. note:: - When notifiers are used with `sla_miss_callback` the context will contain only values passed to the callback, refer :ref:`sla_miss_callback`. Example Code: ------------- diff --git a/docs/apache-airflow-providers-smtp/notifications/smtp_notifier_howto_guide.rst b/docs/apache-airflow-providers-smtp/notifications/smtp_notifier_howto_guide.rst index c7183c5e56874e..4cb1bf310e03d2 100644 --- a/docs/apache-airflow-providers-smtp/notifications/smtp_notifier_howto_guide.rst +++ b/docs/apache-airflow-providers-smtp/notifications/smtp_notifier_howto_guide.rst @@ -23,10 +23,6 @@ Introduction The SMTP notifier (:class:`airflow.providers.smtp.notifications.smtp.SmtpNotifier`) allows users to send messages to SMTP servers using the various ``on_*_callbacks`` at both the DAG level and Task level. -You can also use a notifier with ``sla_miss_callback``. - -.. note:: - When notifiers are used with `sla_miss_callback` the context will contain only values passed to the callback, refer :ref:`sla_miss_callback`. Example Code: ------------- diff --git a/docs/apache-airflow/administration-and-deployment/listeners.rst b/docs/apache-airflow/administration-and-deployment/listeners.rst index 34909e225aaa9b..4926b12ed6c6dd 100644 --- a/docs/apache-airflow/administration-and-deployment/listeners.rst +++ b/docs/apache-airflow/administration-and-deployment/listeners.rst @@ -95,6 +95,7 @@ Dataset Events -------------- - ``on_dataset_created`` +- ``on_dataset_alias_created`` - ``on_dataset_changed`` Dataset events occur when Dataset management operations are run. diff --git a/docs/apache-airflow/administration-and-deployment/logging-monitoring/callbacks.rst b/docs/apache-airflow/administration-and-deployment/logging-monitoring/callbacks.rst index a70a876ba347eb..b54071373cf098 100644 --- a/docs/apache-airflow/administration-and-deployment/logging-monitoring/callbacks.rst +++ b/docs/apache-airflow/administration-and-deployment/logging-monitoring/callbacks.rst @@ -46,7 +46,6 @@ Name Description =========================================== ================================================================ ``on_success_callback`` Invoked when the task :ref:`succeeds ` ``on_failure_callback`` Invoked when the task :ref:`fails ` -``sla_miss_callback`` Invoked when a task misses its defined :ref:`SLA ` ``on_retry_callback`` Invoked when the task is :ref:`up for retry ` ``on_execute_callback`` Invoked right before the task begins executing. ``on_skipped_callback`` Invoked when the task is :ref:`running ` and AirflowSkipException raised. diff --git a/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst b/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst index c8522bee3ba108..61985cecea9b03 100644 --- a/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst +++ b/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst @@ -164,7 +164,6 @@ Name Descripti Metric with file_path and action tagging. ``dag_processing.processor_timeouts`` Number of file processors that have been killed due to taking too long. Metric with file_path tagging. -``dag_processing.sla_callback_count`` Number of SLA callbacks received ``dag_processing.other_callback_count`` Number of non-SLA callbacks received ``dag_processing.file_path_queue_update_count`` Number of times we've scanned the filesystem and queued all existing dags ``dag_file_processor_timeouts`` (DEPRECATED) same behavior as ``dag_processing.processor_timeouts`` @@ -176,9 +175,6 @@ Name Descripti ``scheduler.critical_section_busy`` Count of times a scheduler process tried to get a lock on the critical section (needed to send tasks to the executor) and found it locked by another process. -``sla_missed`` Number of SLA misses. Metric with dag_id and task_id tagging. -``sla_callback_notification_failure`` Number of failed SLA miss callback notification attempts. Metric with dag_id and func_name tagging. -``sla_email_notification_failure`` Number of failed SLA miss email notification attempts. Metric with dag_id tagging. ``ti.start..`` Number of started task in a given dag. Similar to _start but for task ``ti.start`` Number of started task in a given dag. Similar to _start but for task. Metric with dag_id and task_id tagging. diff --git a/docs/apache-airflow/core-concepts/auth-manager.rst b/docs/apache-airflow/core-concepts/auth-manager/index.rst similarity index 96% rename from docs/apache-airflow/core-concepts/auth-manager.rst rename to docs/apache-airflow/core-concepts/auth-manager/index.rst index 521264fd78ba70..b61b44ae39ec40 100644 --- a/docs/apache-airflow/core-concepts/auth-manager.rst +++ b/docs/apache-airflow/core-concepts/auth-manager/index.rst @@ -21,7 +21,7 @@ Auth manager Auth (for authentication/authorization) manager is the component in Airflow to handle user authentication and user authorization. They have a common API and are "pluggable", meaning you can swap auth managers based on your installation needs. -.. image:: ../img/diagram_auth_manager_airflow_architecture.png +.. image:: ../../img/diagram_auth_manager_airflow_architecture.png Airflow can only have one auth manager configured at a time; this is set by the ``auth_manager`` option in the ``[core]`` section of :doc:`the configuration file `. @@ -37,6 +37,22 @@ If you want to check which auth manager is currently set, you can use the $ airflow config get-value core auth_manager airflow.providers.fab.auth_manager.fab_auth_manager.FabAuthManager +Available auth managers to use +------------------------------ + +Here is the list of auth managers available today that you can use in your Airflow environment. + +Provided by Airflow: + +.. toctree:: + :maxdepth: 1 + + simple + +Provided by providers: + +* :doc:`apache-airflow-providers-fab:auth-manager/index` +* :doc:`apache-airflow-providers-amazon:auth-manager/index` Why pluggable auth managers? ---------------------------- diff --git a/docs/apache-airflow/core-concepts/auth-manager/simple.rst b/docs/apache-airflow/core-concepts/auth-manager/simple.rst new file mode 100644 index 00000000000000..bef2e5032f0d77 --- /dev/null +++ b/docs/apache-airflow/core-concepts/auth-manager/simple.rst @@ -0,0 +1,81 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Simple auth manager +=================== + +.. note:: + Before reading this, you should be familiar with the concept of auth manager. + See :doc:`/core-concepts/auth-manager/index`. + +.. warning:: + The simple auth manager is intended to be used for development and testing purposes. It should not be used in a production environment. + +The simple auth manager is the auth manager that comes by default in Airflow 3. As its name suggests, +the logic and implementation of the simple auth manager is **simple**. + +Manage users +------------ + +Users are managed through the `webserver config file `__. +In this file, the list of users are defined in the constant ``SIMPLE_AUTH_MANAGER_USERS``. Example: + +.. code-block:: python + + SIMPLE_AUTH_MANAGER_USERS = [ + { + "username": "admin", + "role": "admin", + } + ] + +Each user needs two pieces of information: + +* **username**. The user's username +* **role**. The role associated to the user. For more information about these roles, :ref:`see next section `. + +The password is auto-generated for each user and printed out in the webserver logs. +When generated, these passwords are also saved in your environment, therefore they will not change if you stop or restart your environment. + +.. _roles-permissions: + +Manage roles and permissions +---------------------------- + +There is no option to manage roles and permissions in simple auth manager. They are defined as part of the simple auth manager implementation and cannot be modified. +Here is the list of roles defined in simple auth manager. These roles can be associated to users. + +* **viewer**. Read-only permissions on DAGs, assets and pools +* **user**. **viewer** permissions plus all permissions (edit, create, delete) on DAGs +* **op**. **user** permissions plus all permissions on pools, assets, config, connections and variables +* **admin**. All permissions + +Optional features +----------------- + +Disable authentication and allow everyone as admin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This option allow you to disable authentication and allow everyone as admin. +As a consequence, whoever access the Airflow UI is automatically logged in as an admin with all permissions. + +To enable this feature, you need to set the constant ``SIMPLE_AUTH_MANAGER_ALL_ADMINS`` to ``True`` in the `webserver config file `__. +Example: + +.. code-block:: python + + SIMPLE_AUTH_MANAGER_ALL_ADMINS = True diff --git a/docs/apache-airflow/core-concepts/executor/index.rst b/docs/apache-airflow/core-concepts/executor/index.rst index 1bb11f2335ab6e..c7ad952f21d629 100644 --- a/docs/apache-airflow/core-concepts/executor/index.rst +++ b/docs/apache-airflow/core-concepts/executor/index.rst @@ -154,7 +154,7 @@ To make it easier to specify executors on tasks and DAGs, executor configuration .. code-block:: ini [core] - executor = 'LocalExecutor,my.custom.module.ExecutorClass:ShortName' + executor = 'LocalExecutor,ShortName:my.custom.module.ExecutorClass' .. note:: If a DAG specifies a task to use an executor that is not configured, the DAG will fail to parse and a warning dialog will be shown in the Airflow UI. Please ensure that all executors you wish to use are specified in Airflow configuration on *any* host/container that is running an Airflow component (scheduler, workers, etc). diff --git a/docs/apache-airflow/core-concepts/index.rst b/docs/apache-airflow/core-concepts/index.rst index 47dff5ef73f067..0fded6ba495f4b 100644 --- a/docs/apache-airflow/core-concepts/index.rst +++ b/docs/apache-airflow/core-concepts/index.rst @@ -40,7 +40,7 @@ Here you can find detailed documentation about each one of the core concepts of sensors taskflow executor/index - auth-manager + auth-manager/index objectstorage **Communication** diff --git a/docs/apache-airflow/core-concepts/tasks.rst b/docs/apache-airflow/core-concepts/tasks.rst index 0e05f55bcf5c86..ad03283ef772d5 100644 --- a/docs/apache-airflow/core-concepts/tasks.rst +++ b/docs/apache-airflow/core-concepts/tasks.rst @@ -149,82 +149,11 @@ is periodically executed and rescheduled until it succeeds. mode="reschedule", ) -If you merely want to be notified if a task runs over but still let it run to completion, you want :ref:`concepts:slas` instead. - - -.. _concepts:slas: SLAs ---- -An SLA, or a Service Level Agreement, is an expectation for the maximum time a Task should be completed relative to the Dag Run start time. If a task takes longer than this to run, it is then visible in the "SLA Misses" part of the user interface, as well as going out in an email of all tasks that missed their SLA. - -Tasks over their SLA are not cancelled, though - they are allowed to run to completion. If you want to cancel a task after a certain runtime is reached, you want :ref:`concepts:timeouts` instead. - -To set an SLA for a task, pass a ``datetime.timedelta`` object to the Task/Operator's ``sla`` parameter. You can also supply an ``sla_miss_callback`` that will be called when the SLA is missed if you want to run your own logic. - -If you want to disable SLA checking entirely, you can set ``check_slas = False`` in Airflow's ``[core]`` configuration. - -To read more about configuring the emails, see :doc:`/howto/email-config`. - -.. note:: - - Manually-triggered tasks and tasks in event-driven DAGs will not be checked for an SLA miss. For more information on DAG ``schedule`` values see :doc:`DAG Run `. - -.. _concepts:sla_miss_callback: - -sla_miss_callback -~~~~~~~~~~~~~~~~~ - -You can also supply an ``sla_miss_callback`` that will be called when the SLA is missed if you want to run your own logic. -The function signature of an ``sla_miss_callback`` requires 5 parameters. - -#. ``dag`` - - * Parent :ref:`DAG ` Object for the :doc:`DAGRun ` in which tasks missed their - :ref:`SLA `. - -#. ``task_list`` - - * String list (new-line separated, \\n) of all tasks that missed their :ref:`SLA ` - since the last time that the ``sla_miss_callback`` ran. - -#. ``blocking_task_list`` - - * Any task in the :doc:`DAGRun(s)` (with the same ``execution_date`` as a task that missed - :ref:`SLA `) that is not in a **SUCCESS** state at the time that the ``sla_miss_callback`` - runs. i.e. 'running', 'failed'. These tasks are described as tasks that are blocking itself or another - task from completing before its SLA window is complete. - -#. ``slas`` - - * List of :py:mod:`SlaMiss` objects associated with the tasks in the - ``task_list`` parameter. - -#. ``blocking_tis`` - - * List of the :ref:`TaskInstance ` objects that are associated with the tasks - in the ``blocking_task_list`` parameter. - -Examples of ``sla_miss_callback`` function signature: - -.. code-block:: python - - def my_sla_miss_callback(dag, task_list, blocking_task_list, slas, blocking_tis): - ... - -.. code-block:: python - - def my_sla_miss_callback(*args): - ... - -Example DAG: - -.. exampleinclude:: /../../airflow/example_dags/example_sla_dag.py - :language: python - :start-after: [START howto_task_sla] - :end-before: [END howto_task_sla] - +The SLA feature from Airflow 2 has been removed in 3.0 and will be replaced with a new implementation in Airflow 3.1 Special Exceptions ------------------ diff --git a/docs/apache-airflow/public-airflow-interface.rst b/docs/apache-airflow/public-airflow-interface.rst index ee5b80e42fb2c2..2853c6fbe2e1b2 100644 --- a/docs/apache-airflow/public-airflow-interface.rst +++ b/docs/apache-airflow/public-airflow-interface.rst @@ -337,7 +337,7 @@ derived from :class:`~airflow.auth.managers.base_auth_manager.BaseAuthManager`. The auth manager interface itself (the :class:`~airflow.auth.managers.base_auth_manager.BaseAuthManager` class) is public, but the different implementations of auth managers are not (i.e. FabAuthManager). -You can read more about auth managers and how to write your own in :doc:`core-concepts/auth-manager`. +You can read more about auth managers and how to write your own in :doc:`core-concepts/auth-manager/index`. Authentication Backends ----------------------- diff --git a/docs/apache-airflow/redirects.txt b/docs/apache-airflow/redirects.txt index 542840535fb154..8f5bac18abfe36 100644 --- a/docs/apache-airflow/redirects.txt +++ b/docs/apache-airflow/redirects.txt @@ -100,6 +100,7 @@ logging-monitoring/index.rst administration-and-deployment/logging-monitoring/in concepts/index.rst core-concepts/index.rst executor/index.rst core-concepts/executor/index.rst upgrading-from-1-10/index.rst howto/upgrading-from-1-10/index.rst +core-concepts/auth-manager.rst core-concepts/auth-manager/index.rst listeners.rst administration-and-deployment/listeners.rst kubernetes.rst administration-and-deployment/kubernetes.rst diff --git a/docs/conf.py b/docs/conf.py index c87871e7ede6da..4d01e402195a5d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -755,7 +755,6 @@ def _get_params(root_schema: dict, prefix: str = "", default_section: str = "") "*/node_modules/*", "*/migrations/*", "*/contrib/*", - "**/example_sla_dag.py", "**/example_taskflow_api_docker_virtualenv.py", "**/example_dag_decorator.py", ] diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index b80f2b0872f9c7..b834ccc9b20054 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -1684,6 +1684,7 @@ tooltip tooltips traceback tracebacks +tracemalloc TrainingPipeline travis triage diff --git a/generated/provider_metadata.json b/generated/provider_metadata.json index 4ca06608c7cab8..a73e3da9f6fce2 100644 --- a/generated/provider_metadata.json +++ b/generated/provider_metadata.json @@ -85,8 +85,12 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.9.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "4.0.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "alibaba": { @@ -179,8 +183,12 @@ "date_released": "2024-05-30T06:38:15Z" }, "2.9.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "2.9.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "amazon": { @@ -433,8 +441,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "8.28.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "8.29.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "apache.beam": { @@ -567,7 +579,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "5.8.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -649,7 +661,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.6.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:59Z" } }, @@ -751,7 +763,7 @@ "date_released": "2024-08-06T20:34:44Z" }, "2.8.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -877,7 +889,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "3.11.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -927,8 +939,12 @@ "date_released": "2024-06-27T07:50:54Z" }, "1.5.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "1.5.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "apache.hdfs": { @@ -1033,8 +1049,12 @@ "date_released": "2024-06-27T07:50:54Z" }, "4.5.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:59Z" + }, + "4.5.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "apache.hive": { @@ -1215,7 +1235,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "8.2.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -1225,7 +1245,7 @@ "date_released": "2024-05-17T16:07:16Z" }, "1.1.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -1275,8 +1295,12 @@ "date_released": "2024-08-06T20:34:44Z" }, "1.5.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "1.5.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "apache.kafka": { @@ -1321,7 +1345,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "1.6.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -1395,7 +1419,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "3.7.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -1505,8 +1529,12 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.9.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "3.9.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "apache.pig": { @@ -1575,7 +1603,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "4.5.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -1677,7 +1705,7 @@ "date_released": "2024-08-06T20:34:44Z" }, "4.5.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -1815,8 +1843,12 @@ "date_released": "2024-07-25T14:17:37Z" }, "4.10.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" + }, + "4.11.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "apprise": { @@ -1861,7 +1893,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "1.4.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -1915,7 +1947,7 @@ "date_released": "2024-05-30T06:38:16Z" }, "2.6.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -1985,7 +2017,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "2.6.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -2043,7 +2075,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "2.7.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:59Z" } }, @@ -2169,8 +2201,12 @@ "date_released": "2024-08-22T10:37:58Z" }, "3.8.1": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-28T10:31:24Z" + }, + "3.8.2": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "cloudant": { @@ -2243,8 +2279,12 @@ "date_released": "2024-06-27T07:50:54Z" }, "3.6.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "4.0.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "cncf.kubernetes": { @@ -2497,8 +2537,12 @@ "date_released": "2024-08-22T10:37:58Z" }, "8.4.1": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-28T10:31:24Z" + }, + "8.4.2": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "cohere": { @@ -2531,7 +2575,7 @@ "date_released": "2024-05-30T06:38:14Z" }, "1.3.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -2545,7 +2589,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "1.2.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -2581,6 +2625,10 @@ "1.4.0": { "associated_airflow_version": "2.10.0", "date_released": "2024-08-06T20:34:43Z" + }, + "1.4.1": { + "associated_airflow_version": "2.10.0", + "date_released": "2024-09-24T13:49:56Z" } }, "common.sql": { @@ -2709,8 +2757,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "1.16.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" + }, + "1.17.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "databricks": { @@ -2875,8 +2927,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "6.9.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "6.10.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "datadog": { @@ -2953,8 +3009,12 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.7.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" + }, + "3.7.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "dbt.cloud": { @@ -3067,8 +3127,12 @@ "date_released": "2024-06-27T07:50:54Z" }, "3.10.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "3.10.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "dingding": { @@ -3137,7 +3201,7 @@ "date_released": "2024-05-30T06:38:16Z" }, "3.6.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -3219,7 +3283,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.8.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -3397,8 +3461,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "3.13.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" + }, + "3.14.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "elasticsearch": { @@ -3559,8 +3627,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "5.5.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" + }, + "5.5.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "exasol": { @@ -3693,7 +3765,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "4.6.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -3739,8 +3811,12 @@ "date_released": "2024-07-31T14:18:50Z" }, "1.3.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "1.4.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "facebook": { @@ -3829,7 +3905,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "3.6.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -3943,8 +4019,12 @@ "date_released": "2024-08-06T20:34:44Z" }, "3.11.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "3.11.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "github": { @@ -4017,7 +4097,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "2.7.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:59Z" } }, @@ -4259,8 +4339,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "10.22.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "10.23.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "grpc": { @@ -4341,7 +4425,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "3.6.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -4459,7 +4543,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.8.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -4593,8 +4677,12 @@ "date_released": "2024-06-27T07:50:54Z" }, "4.13.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "4.13.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "imap": { @@ -4687,7 +4775,7 @@ "date_released": "2024-05-30T06:38:16Z" }, "3.7.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -4761,8 +4849,12 @@ "date_released": "2024-07-12T12:38:31Z" }, "2.7.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "2.7.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "jdbc": { @@ -4863,8 +4955,12 @@ "date_released": "2024-08-06T20:34:44Z" }, "4.5.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" + }, + "4.5.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "jenkins": { @@ -4965,8 +5061,12 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.7.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" + }, + "3.7.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "microsoft.azure": { @@ -5195,8 +5295,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "10.4.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "10.5.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "microsoft.mssql": { @@ -5305,8 +5409,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "3.9.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "3.9.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "microsoft.psrp": { @@ -5387,7 +5495,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "2.8.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -5473,7 +5581,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.6.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -5571,8 +5679,12 @@ "date_released": "2024-06-27T07:50:54Z" }, "4.2.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" + }, + "4.2.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "mysql": { @@ -5725,8 +5837,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "5.7.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "5.7.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "neo4j": { @@ -5815,7 +5931,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.7.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -5921,8 +6037,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "4.7.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "4.7.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "openai": { @@ -5951,8 +6071,12 @@ "date_released": "2024-06-27T07:50:54Z" }, "1.3.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "1.4.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "openfaas": { @@ -6017,7 +6141,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "3.6.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -6095,8 +6219,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "1.11.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-28T10:31:24Z" + }, + "1.12.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "opensearch": { @@ -6129,7 +6257,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "1.4.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -6215,7 +6343,7 @@ "date_released": "2024-05-30T06:38:16Z" }, "5.7.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -6345,7 +6473,7 @@ "date_released": "2024-07-12T12:38:31Z" }, "3.11.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:59Z" } }, @@ -6439,7 +6567,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "3.8.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -6537,8 +6665,12 @@ "date_released": "2024-06-27T07:50:54Z" }, "3.8.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "3.8.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "pgvector": { @@ -6563,7 +6695,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "1.3.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -6593,7 +6725,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "2.1.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } }, @@ -6743,8 +6875,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "5.12.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:59Z" + }, + "5.13.0": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "presto": { @@ -6881,7 +7017,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "5.6.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -6903,7 +7039,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "1.2.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -6993,7 +7129,7 @@ "date_released": "2024-05-30T06:38:16Z" }, "3.8.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -7119,7 +7255,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "5.8.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -7201,7 +7337,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "4.8.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -7267,7 +7403,7 @@ "date_released": "2024-05-30T06:38:16Z" }, "3.6.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -7341,7 +7477,7 @@ "date_released": "2024-05-30T06:38:14Z" }, "3.6.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -7499,8 +7635,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "4.11.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "4.11.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "singularity": { @@ -7573,7 +7713,7 @@ "date_released": "2024-05-30T06:38:16Z" }, "3.6.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -7711,7 +7851,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "8.9.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -7773,7 +7913,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "1.8.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -7975,8 +8115,12 @@ "date_released": "2024-08-06T20:34:43Z" }, "5.7.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" + }, + "5.7.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "sqlite": { @@ -8089,7 +8233,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "3.9.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -8235,7 +8379,7 @@ "date_released": "2024-08-22T10:37:57Z" }, "3.13.1": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-28T10:31:24Z" } }, @@ -8341,58 +8485,12 @@ "date_released": "2024-06-27T07:50:54Z" }, "4.6.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" - } - }, - "tabular": { - "1.0.0": { - "associated_airflow_version": "2.3.4", - "date_released": "2022-07-13T20:26:57Z" }, - "1.0.1": { - "associated_airflow_version": "2.4.0", - "date_released": "2022-07-17T09:00:32Z" - }, - "1.1.0": { - "associated_airflow_version": "2.5.0", - "date_released": "2022-11-18T10:44:03Z" - }, - "1.2.0": { - "associated_airflow_version": "2.6.2", - "date_released": "2023-05-23T14:20:25Z" - }, - "1.2.1": { - "associated_airflow_version": "2.6.3", - "date_released": "2023-06-23T15:38:46Z" - }, - "1.3.0": { - "associated_airflow_version": "2.7.3", - "date_released": "2023-10-17T07:49:17Z" - }, - "1.4.0": { - "associated_airflow_version": "2.8.0", - "date_released": "2023-12-12T07:17:13Z" - }, - "1.4.1": { - "associated_airflow_version": "2.8.1", - "date_released": "2023-12-27T23:07:27Z" - }, - "1.5.0": { - "associated_airflow_version": "2.8.1", - "date_released": "2024-05-06T08:35:20Z" - }, - "1.5.1": { - "associated_airflow_version": "2.9.2", - "date_released": "2024-05-17T16:07:16Z" - }, - "1.6.0": { - "associated_airflow_version": "2.9.2", - "date_released": "2024-08-22T10:37:58Z" - }, - "1.6.1": { - "associated_airflow_version": "2.9.2", - "date_released": "2024-08-28T10:31:24Z" + "4.6.1": { + "associated_airflow_version": "2.10.1", + "date_released": "2024-09-24T13:49:56Z" } }, "telegram": { @@ -8481,7 +8579,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "4.6.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -8515,7 +8613,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "2.6.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -8661,7 +8759,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "5.8.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -8767,7 +8865,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "3.9.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -8817,7 +8915,7 @@ "date_released": "2024-07-15T11:42:08Z" }, "2.1.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:59Z" } }, @@ -8915,7 +9013,7 @@ "date_released": "2024-06-27T07:50:54Z" }, "3.12.0": { - "associated_airflow_version": "2.9.3", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:58Z" } }, @@ -8933,7 +9031,7 @@ "date_released": "2024-08-06T20:34:43Z" }, "1.3.0": { - "associated_airflow_version": "2.10.0", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:59Z" } }, @@ -9015,7 +9113,7 @@ "date_released": "2024-05-30T06:38:15Z" }, "4.8.0": { - "associated_airflow_version": "2.9.2", + "associated_airflow_version": "2.10.1", "date_released": "2024-08-22T10:37:57Z" } } diff --git a/helm_tests/airflow_aux/test_migrate_database_job.py b/helm_tests/airflow_aux/test_migrate_database_job.py index 56ac1d1cd50ab5..426a35edc424ec 100644 --- a/helm_tests/airflow_aux/test_migrate_database_job.py +++ b/helm_tests/airflow_aux/test_migrate_database_job.py @@ -455,3 +455,15 @@ def test_overridden_automount_service_account_token(self): show_only=["templates/jobs/migrate-database-job-serviceaccount.yaml"], ) assert jmespath.search("automountServiceAccountToken", docs[0]) is False + + def test_should_add_component_specific_env(self): + env = {"name": "test_env_key", "value": "test_env_value"} + docs = render_chart( + values={ + "migrateDatabaseJob": { + "env": [env], + }, + }, + show_only=["templates/jobs/migrate-database-job.yaml"], + ) + assert env in jmespath.search("spec.template.spec.containers[0].env", docs[0]) diff --git a/helm_tests/webserver/test_ingress_flower.py b/helm_tests/webserver/test_ingress_flower.py index e3d9ff171d16de..107bf5b270f9c4 100644 --- a/helm_tests/webserver/test_ingress_flower.py +++ b/helm_tests/webserver/test_ingress_flower.py @@ -220,3 +220,28 @@ def test_can_ingress_hosts_be_templated(self): "cc.example.com", "dd.example.com", ] == jmespath.search("spec.rules[*].host", docs[0]) + + def test_backend_service_name(self): + docs = render_chart( + values={"ingress": {"enabled": True}, "flower": {"enabled": True}}, + show_only=["templates/flower/flower-ingress.yaml"], + ) + + assert "release-name-flower" == jmespath.search( + "spec.rules[0].http.paths[0].backend.service.name", docs[0] + ) + + def test_backend_service_name_with_fullname_override(self): + docs = render_chart( + values={ + "fullnameOverride": "test-basic", + "useStandardNaming": True, + "ingress": {"enabled": True}, + "flower": {"enabled": True}, + }, + show_only=["templates/flower/flower-ingress.yaml"], + ) + + assert "test-basic-flower" == jmespath.search( + "spec.rules[0].http.paths[0].backend.service.name", docs[0] + ) diff --git a/helm_tests/webserver/test_ingress_web.py b/helm_tests/webserver/test_ingress_web.py index 798da6c719594d..38c258c93b9c44 100644 --- a/helm_tests/webserver/test_ingress_web.py +++ b/helm_tests/webserver/test_ingress_web.py @@ -200,3 +200,27 @@ def test_can_ingress_hosts_be_templated(self): "cc.example.com", "dd.example.com", ] == jmespath.search("spec.rules[*].host", docs[0]) + + def test_backend_service_name(self): + docs = render_chart( + values={"ingress": {"web": {"enabled": True}}}, + show_only=["templates/webserver/webserver-ingress.yaml"], + ) + + assert "release-name-webserver" == jmespath.search( + "spec.rules[0].http.paths[0].backend.service.name", docs[0] + ) + + def test_backend_service_name_with_fullname_override(self): + docs = render_chart( + values={ + "fullnameOverride": "test-basic", + "useStandardNaming": True, + "ingress": {"web": {"enabled": True}}, + }, + show_only=["templates/webserver/webserver-ingress.yaml"], + ) + + assert "test-basic-webserver" == jmespath.search( + "spec.rules[0].http.paths[0].backend.service.name", docs[0] + ) diff --git a/newsfragments/42285.significant.rst b/newsfragments/42285.significant.rst new file mode 100644 index 00000000000000..8f8cfa0dee2988 --- /dev/null +++ b/newsfragments/42285.significant.rst @@ -0,0 +1 @@ +The SLA feature is removed in Airflow 3.0, to be replaced with Airflow Alerts in 3.1 diff --git a/newsfragments/42343.feature.rst b/newsfragments/42343.feature.rst new file mode 100644 index 00000000000000..8a7cdf335a06ea --- /dev/null +++ b/newsfragments/42343.feature.rst @@ -0,0 +1 @@ +New function ``create_dataset_aliases`` added to DatasetManager for DatasetAlias creation. diff --git a/newsfragments/42343.significant.rst b/newsfragments/42343.significant.rst new file mode 100644 index 00000000000000..d9e1ba6b1229b0 --- /dev/null +++ b/newsfragments/42343.significant.rst @@ -0,0 +1,7 @@ +``DatasetManager.create_datasets`` now takes ``Dataset`` objects + +This function previously accepts a list of ``DatasetModel`` objects. it now +receives ``Dataset`` objects instead. A list of ``DatasetModel`` objects are +created inside, and returned by the function. + +Also, the ``session`` argument is now keyword-only. diff --git a/pyproject.toml b/pyproject.toml index dff78de57687b2..eba9f7225ad34a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -415,6 +415,9 @@ combine-as-imports = true "tests/providers/openai/triggers/test_openai.py" = ["E402"] "tests/providers/opensearch/conftest.py" = ["E402"] "tests/providers/opensearch/hooks/test_opensearch.py" = ["E402"] +"tests/providers/opensearch/log/test_os_json_formatter.py" = ["E402"] +"tests/providers/opensearch/log/test_os_response.py" = ["E402"] +"tests/providers/opensearch/log/test_os_task_handler.py" = ["E402"] "tests/providers/opensearch/operators/test_opensearch.py" = ["E402"] "tests/providers/qdrant/hooks/test_qdrant.py" = ["E402"] "tests/providers/qdrant/operators/test_qdrant.py" = ["E402"] diff --git a/tests/api_connexion/endpoints/test_xcom_endpoint.py b/tests/api_connexion/endpoints/test_xcom_endpoint.py index 9f2d6525006949..7a51714c5b2993 100644 --- a/tests/api_connexion/endpoints/test_xcom_endpoint.py +++ b/tests/api_connexion/endpoints/test_xcom_endpoint.py @@ -174,6 +174,36 @@ def test_should_respond_200_native(self): "value": {"key": "value"}, } + @conf_vars({("core", "enable_xcom_pickling"): "True"}) + def test_should_respond_200_native_for_pickled(self): + dag_id = "test-dag-id" + task_id = "test-task-id" + execution_date = "2005-04-02T00:00:00+00:00" + xcom_key = "test-xcom-key" + execution_date_parsed = parse_execution_date(execution_date) + run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date_parsed) + value_non_serializable_key = {("201009_NB502104_0421_AHJY23BGXG (SEQ_WF: 138898)", None): 82359} + self._create_xcom_entry( + dag_id, run_id, execution_date_parsed, task_id, xcom_key, {"key": value_non_serializable_key} + ) + response = self.client.get( + f"/api/v1/dags/{dag_id}/dagRuns/{run_id}/taskInstances/{task_id}/xcomEntries/{xcom_key}", + environ_overrides={"REMOTE_USER": "test"}, + ) + assert 200 == response.status_code + + current_data = response.json + current_data["timestamp"] = "TIMESTAMP" + assert current_data == { + "dag_id": dag_id, + "execution_date": execution_date, + "key": xcom_key, + "task_id": task_id, + "map_index": -1, + "timestamp": "TIMESTAMP", + "value": f"{{'key': {str(value_non_serializable_key)}}}", + } + def test_should_raise_404_for_non_existent_xcom(self): dag_id = "test-dag-id" task_id = "test-task-id" diff --git a/tests/api_fastapi/views/public/test_dags.py b/tests/api_fastapi/views/public/test_dags.py index 8d83bf1d217c4a..dfba5437a8af36 100644 --- a/tests/api_fastapi/views/public/test_dags.py +++ b/tests/api_fastapi/views/public/test_dags.py @@ -16,7 +16,7 @@ # under the License. from __future__ import annotations -from datetime import datetime +from datetime import datetime, timezone import pytest @@ -27,8 +27,10 @@ pytestmark = pytest.mark.db_test -DAG_ID = "test_dag1" +DAG1_ID = "test_dag1" +DAG1_DISPLAY_NAME = "display1" DAG2_ID = "test_dag2" +DAG2_DISPLAY_NAME = "display2" DAG3_ID = "test_dag3" TASK_ID = "op1" @@ -41,6 +43,8 @@ def _create_deactivated_paused_dag(session=None): timetable_summary="2 2 * * *", is_active=False, is_paused=True, + owners="test_owner,another_test_owner", + next_dagrun=datetime(2021, 1, 1, 12, 0, 0, tzinfo=timezone.utc), ) session.add(dag_model) @@ -52,7 +56,8 @@ def setup() -> None: clear_db_serialized_dags() with DAG( - DAG_ID, + DAG1_ID, + dag_display_name=DAG1_DISPLAY_NAME, schedule=None, start_date=datetime(2020, 6, 15), doc_md="details", @@ -61,7 +66,16 @@ def setup() -> None: ) as dag1: EmptyOperator(task_id=TASK_ID) - with DAG(DAG2_ID, schedule=None, start_date=datetime(2020, 6, 15)) as dag2: + with DAG( + DAG2_ID, + dag_display_name=DAG2_DISPLAY_NAME, + schedule=None, + start_date=datetime( + 2020, + 6, + 15, + ), + ) as dag2: EmptyOperator(task_id=TASK_ID) dag1.sync_to_db() @@ -73,15 +87,24 @@ def setup() -> None: @pytest.mark.parametrize( "query_params, expected_total_entries, expected_ids", [ + # Filters ({}, 2, ["test_dag1", "test_dag2"]), ({"limit": 1}, 2, ["test_dag1"]), ({"offset": 1}, 2, ["test_dag2"]), ({"tags": ["example"]}, 1, ["test_dag1"]), - ({"dag_id_pattern": "1"}, 1, ["test_dag1"]), ({"only_active": False}, 3, ["test_dag1", "test_dag2", "test_dag3"]), ({"paused": True, "only_active": False}, 1, ["test_dag3"]), ({"paused": False}, 2, ["test_dag1", "test_dag2"]), + ({"owners": ["airflow"]}, 2, ["test_dag1", "test_dag2"]), + ({"owners": ["test_owner"], "only_active": False}, 1, ["test_dag3"]), + # # Sort ({"order_by": "-dag_id"}, 2, ["test_dag2", "test_dag1"]), + ({"order_by": "-dag_display_name"}, 2, ["test_dag2", "test_dag1"]), + ({"order_by": "dag_display_name"}, 2, ["test_dag1", "test_dag2"]), + ({"order_by": "next_dagrun", "only_active": False}, 3, ["test_dag3", "test_dag1", "test_dag2"]), + # Search + ({"dag_id_pattern": "1"}, 1, ["test_dag1"]), + ({"dag_display_name_pattern": "display2"}, 1, ["test_dag2"]), ], ) def test_get_dags(test_client, query_params, expected_total_entries, expected_ids): diff --git a/tests/auth/managers/simple/test_simple_auth_manager.py b/tests/auth/managers/simple/test_simple_auth_manager.py index 3191069860c6b2..a11c79063d0429 100644 --- a/tests/auth/managers/simple/test_simple_auth_manager.py +++ b/tests/auth/managers/simple/test_simple_auth_manager.py @@ -72,19 +72,26 @@ def test_init_with_users(self, auth_manager_with_appbuilder): assert len(user_passwords_from_file) == 1 @pytest.mark.db_test - def test_is_logged_in(self, auth_manager, app, test_user): + def test_is_logged_in(self, auth_manager_with_appbuilder, app, test_user): with app.test_request_context(): session["user"] = test_user - result = auth_manager.is_logged_in() + result = auth_manager_with_appbuilder.is_logged_in() assert result @pytest.mark.db_test - def test_is_logged_in_return_false_when_no_user_in_session(self, auth_manager, app, test_user): + def test_is_logged_in_return_false_when_no_user_in_session(self, auth_manager_with_appbuilder, app): with app.test_request_context(): - result = auth_manager.is_logged_in() + result = auth_manager_with_appbuilder.is_logged_in() assert result is False + @pytest.mark.db_test + def test_is_logged_in_with_all_admins(self, auth_manager_with_appbuilder, app): + auth_manager_with_appbuilder.appbuilder.app.config["SIMPLE_AUTH_MANAGER_ALL_ADMINS"] = True + with app.test_request_context(): + result = auth_manager_with_appbuilder.is_logged_in() + assert result + @patch("airflow.auth.managers.simple.simple_auth_manager.url_for") def test_get_url_login(self, mock_url_for, auth_manager): auth_manager.get_url_login() @@ -97,15 +104,27 @@ def test_get_url_logout(self, mock_url_for, auth_manager): @pytest.mark.db_test @patch.object(SimpleAuthManager, "is_logged_in") - def test_get_user(self, mock_is_logged_in, auth_manager, app, test_user): + def test_get_user(self, mock_is_logged_in, auth_manager_with_appbuilder, app, test_user): mock_is_logged_in.return_value = True with app.test_request_context(): session["user"] = test_user - result = auth_manager.get_user() + result = auth_manager_with_appbuilder.get_user() assert result == test_user + @pytest.mark.db_test + @patch.object(SimpleAuthManager, "is_logged_in") + def test_get_user_with_all_admins(self, mock_is_logged_in, auth_manager_with_appbuilder, app): + mock_is_logged_in.return_value = True + + auth_manager_with_appbuilder.appbuilder.app.config["SIMPLE_AUTH_MANAGER_ALL_ADMINS"] = True + with app.test_request_context(): + result = auth_manager_with_appbuilder.get_user() + + assert result.username == "anonymous" + assert result.role == "admin" + @patch.object(SimpleAuthManager, "is_logged_in") def test_get_user_return_none_when_not_logged_in(self, mock_is_logged_in, auth_manager): mock_is_logged_in.return_value = False @@ -138,13 +157,13 @@ def test_get_user_return_none_when_not_logged_in(self, mock_is_logged_in, auth_m ], ) def test_is_authorized_methods( - self, mock_is_logged_in, auth_manager, app, api, is_logged_in, role, method, result + self, mock_is_logged_in, auth_manager_with_appbuilder, app, api, is_logged_in, role, method, result ): mock_is_logged_in.return_value = is_logged_in with app.test_request_context(): session["user"] = SimpleAuthManagerUser(username="test", role=role) - assert getattr(auth_manager, api)(method=method) is result + assert getattr(auth_manager_with_appbuilder, api)(method=method) is result @pytest.mark.db_test @patch.object(SimpleAuthManager, "is_logged_in") @@ -172,13 +191,13 @@ def test_is_authorized_methods( ], ) def test_is_authorized_view_methods( - self, mock_is_logged_in, auth_manager, app, api, kwargs, is_logged_in, role, result + self, mock_is_logged_in, auth_manager_with_appbuilder, app, api, kwargs, is_logged_in, role, result ): mock_is_logged_in.return_value = is_logged_in with app.test_request_context(): session["user"] = SimpleAuthManagerUser(username="test", role=role) - assert getattr(auth_manager, api)(**kwargs) is result + assert getattr(auth_manager_with_appbuilder, api)(**kwargs) is result @pytest.mark.db_test @patch.object(SimpleAuthManager, "is_logged_in") @@ -202,13 +221,13 @@ def test_is_authorized_view_methods( ], ) def test_is_authorized_methods_op_role_required( - self, mock_is_logged_in, auth_manager, app, api, role, method, result + self, mock_is_logged_in, auth_manager_with_appbuilder, app, api, role, method, result ): mock_is_logged_in.return_value = True with app.test_request_context(): session["user"] = SimpleAuthManagerUser(username="test", role=role) - assert getattr(auth_manager, api)(method=method) is result + assert getattr(auth_manager_with_appbuilder, api)(method=method) is result @pytest.mark.db_test @patch.object(SimpleAuthManager, "is_logged_in") @@ -227,13 +246,13 @@ def test_is_authorized_methods_op_role_required( ], ) def test_is_authorized_methods_user_role_required( - self, mock_is_logged_in, auth_manager, app, api, role, method, result + self, mock_is_logged_in, auth_manager_with_appbuilder, app, api, role, method, result ): mock_is_logged_in.return_value = True with app.test_request_context(): session["user"] = SimpleAuthManagerUser(username="test", role=role) - assert getattr(auth_manager, api)(method=method) is result + assert getattr(auth_manager_with_appbuilder, api)(method=method) is result @pytest.mark.db_test @patch.object(SimpleAuthManager, "is_logged_in") @@ -252,13 +271,13 @@ def test_is_authorized_methods_user_role_required( ], ) def test_is_authorized_methods_viewer_role_required_for_get( - self, mock_is_logged_in, auth_manager, app, api, role, method, result + self, mock_is_logged_in, auth_manager_with_appbuilder, app, api, role, method, result ): mock_is_logged_in.return_value = True with app.test_request_context(): session["user"] = SimpleAuthManagerUser(username="test", role=role) - assert getattr(auth_manager, api)(method=method) is result + assert getattr(auth_manager_with_appbuilder, api)(method=method) is result @pytest.mark.db_test @patch( diff --git a/tests/callbacks/test_callback_requests.py b/tests/callbacks/test_callback_requests.py index 6d900c8bd35716..5992ee6fbbf709 100644 --- a/tests/callbacks/test_callback_requests.py +++ b/tests/callbacks/test_callback_requests.py @@ -23,7 +23,6 @@ from airflow.callbacks.callback_requests import ( CallbackRequest, DagCallbackRequest, - SlaCallbackRequest, TaskCallbackRequest, ) from airflow.models.dag import DAG @@ -55,14 +54,6 @@ class TestCallbackRequest: ), DagCallbackRequest, ), - ( - SlaCallbackRequest( - full_filepath="filepath", - dag_id="fake_dag", - processor_subdir="/test_dir", - ), - SlaCallbackRequest, - ), ], ) def test_from_json(self, input, request_class): diff --git a/tests/dag_processing/test_job_runner.py b/tests/dag_processing/test_job_runner.py index 8112b7222a6973..1d3fefdf12d5f9 100644 --- a/tests/dag_processing/test_job_runner.py +++ b/tests/dag_processing/test_job_runner.py @@ -39,7 +39,7 @@ import time_machine from sqlalchemy import func -from airflow.callbacks.callback_requests import CallbackRequest, DagCallbackRequest, SlaCallbackRequest +from airflow.callbacks.callback_requests import CallbackRequest, DagCallbackRequest from airflow.config_templates.airflow_local_settings import DEFAULT_LOGGING_CONFIG from airflow.configuration import conf from airflow.dag_processing.manager import ( @@ -1179,16 +1179,10 @@ def test_fetch_callbacks_from_database(self, tmp_path): processor_subdir=os.fspath(tmp_path), run_id="456", ) - callback3 = SlaCallbackRequest( - dag_id="test_start_date_scheduling", - full_filepath=str(dag_filepath), - processor_subdir=os.fspath(tmp_path), - ) with create_session() as session: session.add(DbCallbackRequest(callback=callback1, priority_weight=11)) session.add(DbCallbackRequest(callback=callback2, priority_weight=10)) - session.add(DbCallbackRequest(callback=callback3, priority_weight=9)) child_pipe, parent_pipe = multiprocessing.Pipe() manager = DagProcessorJobRunner( @@ -1371,16 +1365,6 @@ def test_callback_queue(self, tmp_path): processor_subdir=tmp_path, msg=None, ) - dag1_sla1 = SlaCallbackRequest( - full_filepath="/green_eggs/ham/file1.py", - dag_id="dag1", - processor_subdir=tmp_path, - ) - dag1_sla2 = SlaCallbackRequest( - full_filepath="/green_eggs/ham/file1.py", - dag_id="dag1", - processor_subdir=tmp_path, - ) dag2_req1 = DagCallbackRequest( full_filepath="/green_eggs/ham/file2.py", @@ -1391,15 +1375,8 @@ def test_callback_queue(self, tmp_path): msg=None, ) - dag3_sla1 = SlaCallbackRequest( - full_filepath="/green_eggs/ham/file3.py", - dag_id="dag3", - processor_subdir=tmp_path, - ) - # when manager.processor._add_callback_to_queue(dag1_req1) - manager.processor._add_callback_to_queue(dag1_sla1) manager.processor._add_callback_to_queue(dag2_req1) # then - requests should be in manager's queue, with dag2 ahead of dag1 (because it was added last) @@ -1408,18 +1385,10 @@ def test_callback_queue(self, tmp_path): dag1_req1.full_filepath, dag2_req1.full_filepath, } - assert manager.processor._callback_to_execute[dag1_req1.full_filepath] == [dag1_req1, dag1_sla1] assert manager.processor._callback_to_execute[dag2_req1.full_filepath] == [dag2_req1] - # when - manager.processor._add_callback_to_queue(dag1_sla2) - manager.processor._add_callback_to_queue(dag3_sla1) - - # then - since sla2 == sla1, should not have brought dag1 to the fore, and an SLA on dag3 doesn't # update the queue, although the callback is registered assert manager.processor._file_path_queue == deque([dag2_req1.full_filepath, dag1_req1.full_filepath]) - assert manager.processor._callback_to_execute[dag1_req1.full_filepath] == [dag1_req1, dag1_sla1] - assert manager.processor._callback_to_execute[dag3_sla1.full_filepath] == [dag3_sla1] # when manager.processor._add_callback_to_queue(dag1_req2) @@ -1428,7 +1397,6 @@ def test_callback_queue(self, tmp_path): assert manager.processor._file_path_queue == deque([dag1_req1.full_filepath, dag2_req1.full_filepath]) assert manager.processor._callback_to_execute[dag1_req1.full_filepath] == [ dag1_req1, - dag1_sla1, dag1_req2, ] diff --git a/tests/dag_processing/test_processor.py b/tests/dag_processing/test_processor.py index 2b250ae8c55ed1..d7b2b2116653e6 100644 --- a/tests/dag_processing/test_processor.py +++ b/tests/dag_processing/test_processor.py @@ -32,10 +32,9 @@ from airflow.configuration import TEST_DAGS_FOLDER, conf from airflow.dag_processing.manager import DagFileProcessorAgent from airflow.dag_processing.processor import DagFileProcessor, DagFileProcessorProcess -from airflow.models import DagBag, DagModel, SlaMiss, TaskInstance +from airflow.models import DagBag, DagModel, TaskInstance from airflow.models.serialized_dag import SerializedDagModel from airflow.models.taskinstance import SimpleTaskInstance -from airflow.operators.empty import EmptyOperator from airflow.utils import timezone from airflow.utils.session import create_session from airflow.utils.state import State @@ -50,7 +49,6 @@ clear_db_pools, clear_db_runs, clear_db_serialized_dags, - clear_db_sla_miss, ) from tests.test_utils.mock_executor import MockExecutor @@ -89,7 +87,6 @@ def clean_db(): clear_db_runs() clear_db_pools() clear_db_dags() - clear_db_sla_miss() clear_db_import_errors() clear_db_jobs() clear_db_serialized_dags() @@ -116,395 +113,6 @@ def _process_file(self, file_path, dag_directory, session): dag_file_processor.process_file(file_path, [], False) - @pytest.mark.skip_if_database_isolation_mode - @mock.patch("airflow.dag_processing.processor.DagFileProcessor._get_dagbag") - def test_dag_file_processor_sla_miss_callback(self, mock_get_dagbag, create_dummy_dag, get_test_dag): - """ - Test that the dag file processor calls the sla miss callback - """ - session = settings.Session() - sla_callback = MagicMock() - - # Create dag with a start of 1 day ago, but a sla of 0, so we'll already have a sla_miss on the books. - test_start_date = timezone.utcnow() - datetime.timedelta(days=1) - test_run_id = DagRunType.SCHEDULED.generate_run_id(test_start_date) - dag, task = create_dummy_dag( - dag_id="test_sla_miss", - task_id="dummy", - sla_miss_callback=sla_callback, - default_args={"start_date": test_start_date, "sla": datetime.timedelta()}, - ) - - session.merge( - TaskInstance( - task=task, - run_id=test_run_id, - state=State.SUCCESS, - ) - ) - session.merge(SlaMiss(task_id="dummy", dag_id="test_sla_miss", execution_date=test_start_date)) - - mock_dagbag = mock.Mock() - mock_dagbag.get_dag.return_value = dag - mock_get_dagbag.return_value = mock_dagbag - session.commit() - - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id="test_sla_miss", session=session) - - assert sla_callback.called - - @pytest.mark.skip_if_database_isolation_mode - @mock.patch("airflow.dag_processing.processor.DagFileProcessor._get_dagbag") - def test_dag_file_processor_sla_miss_callback_invalid_sla(self, mock_get_dagbag, create_dummy_dag): - """ - Test that the dag file processor does not call the sla miss callback when - given an invalid sla - """ - session = settings.Session() - - sla_callback = MagicMock() - - # Create dag with a start of 1 day ago, but an sla of 0 - # so we'll already have an sla_miss on the books. - # Pass anything besides a timedelta object to the sla argument. - test_start_date = timezone.utcnow() - datetime.timedelta(days=1) - test_run_id = DagRunType.SCHEDULED.generate_run_id(test_start_date) - dag, task = create_dummy_dag( - dag_id="test_sla_miss", - task_id="dummy", - sla_miss_callback=sla_callback, - default_args={"start_date": test_start_date, "sla": None}, - ) - - session.merge(TaskInstance(task=task, run_id=test_run_id, state=State.SUCCESS)) - session.merge(SlaMiss(task_id="dummy", dag_id="test_sla_miss", execution_date=test_start_date)) - - mock_dagbag = mock.Mock() - mock_dagbag.get_dag.return_value = dag - mock_get_dagbag.return_value = mock_dagbag - - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id="test_sla_miss", session=session) - sla_callback.assert_not_called() - - @pytest.mark.skip_if_database_isolation_mode - @mock.patch("airflow.dag_processing.processor.DagFileProcessor._get_dagbag") - def test_dag_file_processor_sla_miss_callback_sent_notification(self, mock_get_dagbag, create_dummy_dag): - """ - Test that the dag file processor does not call the sla_miss_callback when a - notification has already been sent - """ - session = settings.Session() - - # Mock the callback function so we can verify that it was not called - sla_callback = MagicMock() - - # Create dag with a start of 2 days ago, but an sla of 1 day - # ago so we'll already have an sla_miss on the books - test_start_date = timezone.utcnow() - datetime.timedelta(days=2) - test_run_id = DagRunType.SCHEDULED.generate_run_id(test_start_date) - dag, task = create_dummy_dag( - dag_id="test_sla_miss", - task_id="dummy", - sla_miss_callback=sla_callback, - default_args={"start_date": test_start_date, "sla": datetime.timedelta(days=1)}, - ) - - # Create a TaskInstance for two days ago - session.merge(TaskInstance(task=task, run_id=test_run_id, state=State.SUCCESS)) - - # Create an SlaMiss where notification was sent, but email was not - session.merge( - SlaMiss( - task_id="dummy", - dag_id="test_sla_miss", - execution_date=test_start_date, - email_sent=False, - notification_sent=True, - ) - ) - - mock_dagbag = mock.Mock() - mock_dagbag.get_dag.return_value = dag - mock_get_dagbag.return_value = mock_dagbag - - # Now call manage_slas and see if the sla_miss callback gets called - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id="test_sla_miss", session=session) - - sla_callback.assert_not_called() - - @pytest.mark.skip_if_database_isolation_mode - @mock.patch("airflow.dag_processing.processor.Stats.incr") - @mock.patch("airflow.dag_processing.processor.DagFileProcessor._get_dagbag") - def test_dag_file_processor_sla_miss_doesnot_raise_integrity_error( - self, mock_get_dagbag, mock_stats_incr, dag_maker - ): - """ - Test that the dag file processor does not try to insert already existing item into the database - """ - session = settings.Session() - - # Create dag with a start of 2 days ago, but an sla of 1 day - # ago so we'll already have an sla_miss on the books - test_start_date = timezone.utcnow() - datetime.timedelta(days=2) - with dag_maker( - dag_id="test_sla_miss", - default_args={"start_date": test_start_date, "sla": datetime.timedelta(days=1)}, - ) as dag: - task = EmptyOperator(task_id="dummy") - - dr = dag_maker.create_dagrun(execution_date=test_start_date, state=State.SUCCESS) - - # Create a TaskInstance for two days ago - ti = TaskInstance(task=task, run_id=dr.run_id, state=State.SUCCESS) - session.merge(ti) - session.flush() - - mock_dagbag = mock.Mock() - mock_dagbag.get_dag.return_value = dag - mock_get_dagbag.return_value = mock_dagbag - - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id="test_sla_miss", session=session) - sla_miss_count = ( - session.query(SlaMiss) - .filter( - SlaMiss.dag_id == dag.dag_id, - SlaMiss.task_id == task.task_id, - ) - .count() - ) - assert sla_miss_count == 1 - mock_stats_incr.assert_called_with("sla_missed", tags={"dag_id": "test_sla_miss", "task_id": "dummy"}) - # Now call manage_slas and see that it runs without errors - # because of existing SlaMiss above. - # Since this is run often, it's possible that it runs before another - # ti is successful thereby trying to insert a duplicate record. - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id="test_sla_miss", session=session) - - @pytest.mark.skip_if_database_isolation_mode - @mock.patch("airflow.dag_processing.processor.Stats.incr") - @mock.patch("airflow.dag_processing.processor.DagFileProcessor._get_dagbag") - def test_dag_file_processor_sla_miss_continue_checking_the_task_instances_after_recording_missing_sla( - self, mock_get_dagbag, mock_stats_incr, dag_maker - ): - """ - Test that the dag file processor continue checking subsequent task instances - even if the preceding task instance misses the sla ahead - """ - session = settings.Session() - - # Create a dag with a start of 3 days ago and sla of 1 day, - # so we have 2 missing slas - now = timezone.utcnow() - test_start_date = now - datetime.timedelta(days=3) - # test_run_id = DagRunType.SCHEDULED.generate_run_id(test_start_date) - with dag_maker( - dag_id="test_sla_miss", - default_args={"start_date": test_start_date, "sla": datetime.timedelta(days=1)}, - ) as dag: - task = EmptyOperator(task_id="dummy") - - dr = dag_maker.create_dagrun(execution_date=test_start_date, state=State.SUCCESS) - - session.merge(TaskInstance(task=task, run_id=dr.run_id, state="success")) - session.merge( - SlaMiss(task_id=task.task_id, dag_id=dag.dag_id, execution_date=now - datetime.timedelta(days=2)) - ) - session.flush() - - mock_dagbag = mock.Mock() - mock_dagbag.get_dag.return_value = dag - mock_get_dagbag.return_value = mock_dagbag - - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id="test_sla_miss", session=session) - sla_miss_count = ( - session.query(SlaMiss) - .filter( - SlaMiss.dag_id == dag.dag_id, - SlaMiss.task_id == task.task_id, - ) - .count() - ) - assert sla_miss_count == 2 - mock_stats_incr.assert_called_with("sla_missed", tags={"dag_id": "test_sla_miss", "task_id": "dummy"}) - - @pytest.mark.skip_if_database_isolation_mode - @patch.object(DagFileProcessor, "logger") - @mock.patch("airflow.dag_processing.processor.Stats.incr") - @mock.patch("airflow.dag_processing.processor.DagFileProcessor._get_dagbag") - def test_dag_file_processor_sla_miss_callback_exception( - self, - mock_get_dagbag, - mock_stats_incr, - mock_get_log, - create_dummy_dag, - ): - """ - Test that the dag file processor gracefully logs an exception if there is a problem - calling the sla_miss_callback - """ - session = settings.Session() - - sla_callback = MagicMock( - __name__="function_name", side_effect=RuntimeError("Could not call function") - ) - - test_start_date = timezone.utcnow() - datetime.timedelta(days=1) - test_run_id = DagRunType.SCHEDULED.generate_run_id(test_start_date) - - for i, callback in enumerate([[sla_callback], sla_callback]): - dag, task = create_dummy_dag( - dag_id=f"test_sla_miss_{i}", - task_id="dummy", - sla_miss_callback=callback, - default_args={"start_date": test_start_date, "sla": datetime.timedelta(hours=1)}, - ) - mock_stats_incr.reset_mock() - - session.merge(TaskInstance(task=task, run_id=test_run_id, state=State.SUCCESS)) - - # Create an SlaMiss where notification was sent, but email was not - session.merge( - SlaMiss(task_id="dummy", dag_id=f"test_sla_miss_{i}", execution_date=test_start_date) - ) - - # Now call manage_slas and see if the sla_miss callback gets called - mock_log = mock.Mock() - mock_get_log.return_value = mock_log - mock_dagbag = mock.Mock() - mock_dagbag.get_dag.return_value = dag - mock_get_dagbag.return_value = mock_dagbag - - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id="test_sla_miss", session=session) - assert sla_callback.called - mock_log.exception.assert_called_once_with( - "Could not call sla_miss_callback(%s) for DAG %s", - sla_callback.__name__, - f"test_sla_miss_{i}", - ) - mock_stats_incr.assert_called_once_with( - "sla_callback_notification_failure", - tags={"dag_id": f"test_sla_miss_{i}", "func_name": sla_callback.__name__}, - ) - - @pytest.mark.skip_if_database_isolation_mode - @mock.patch("airflow.dag_processing.processor.send_email") - @mock.patch("airflow.dag_processing.processor.DagFileProcessor._get_dagbag") - def test_dag_file_processor_only_collect_emails_from_sla_missed_tasks( - self, mock_get_dagbag, mock_send_email, create_dummy_dag - ): - session = settings.Session() - - test_start_date = timezone.utcnow() - datetime.timedelta(days=1) - test_run_id = DagRunType.SCHEDULED.generate_run_id(test_start_date) - email1 = "test1@test.com" - dag, task = create_dummy_dag( - dag_id="test_sla_miss", - task_id="sla_missed", - email=email1, - default_args={"start_date": test_start_date, "sla": datetime.timedelta(hours=1)}, - ) - session.merge(TaskInstance(task=task, run_id=test_run_id, state=State.SUCCESS)) - - email2 = "test2@test.com" - EmptyOperator(task_id="sla_not_missed", dag=dag, owner="airflow", email=email2) - - session.merge(SlaMiss(task_id="sla_missed", dag_id="test_sla_miss", execution_date=test_start_date)) - - mock_dagbag = mock.Mock() - mock_dagbag.get_dag.return_value = dag - mock_get_dagbag.return_value = mock_dagbag - - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id="test_sla_miss", session=session) - - assert len(mock_send_email.call_args_list) == 1 - - send_email_to = mock_send_email.call_args_list[0][0][0] - assert email1 in send_email_to - assert email2 not in send_email_to - - @pytest.mark.skip_if_database_isolation_mode - @patch.object(DagFileProcessor, "logger") - @mock.patch("airflow.dag_processing.processor.Stats.incr") - @mock.patch("airflow.utils.email.send_email") - @mock.patch("airflow.dag_processing.processor.DagFileProcessor._get_dagbag") - def test_dag_file_processor_sla_miss_email_exception( - self, - mock_get_dagbag, - mock_send_email, - mock_stats_incr, - mock_get_log, - create_dummy_dag, - ): - """ - Test that the dag file processor gracefully logs an exception if there is a problem - sending an email - """ - session = settings.Session() - dag_id = "test_sla_miss" - task_id = "test_ti" - email = "test@test.com" - - # Mock the callback function so we can verify that it was not called - mock_send_email.side_effect = RuntimeError("Could not send an email") - - test_start_date = timezone.utcnow() - datetime.timedelta(days=1) - test_run_id = DagRunType.SCHEDULED.generate_run_id(test_start_date) - dag, task = create_dummy_dag( - dag_id=dag_id, - task_id=task_id, - email=email, - default_args={"start_date": test_start_date, "sla": datetime.timedelta(hours=1)}, - ) - mock_stats_incr.reset_mock() - - session.merge(TaskInstance(task=task, run_id=test_run_id, state=State.SUCCESS)) - - # Create an SlaMiss where notification was sent, but email was not - session.merge(SlaMiss(task_id=task_id, dag_id=dag_id, execution_date=test_start_date)) - - mock_log = mock.Mock() - mock_get_log.return_value = mock_log - mock_dagbag = mock.Mock() - mock_dagbag.get_dag.return_value = dag - mock_get_dagbag.return_value = mock_dagbag - - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id=dag_id, session=session) - mock_log.exception.assert_called_once_with( - "Could not send SLA Miss email notification for DAG %s", dag_id - ) - mock_stats_incr.assert_called_once_with("sla_email_notification_failure", tags={"dag_id": dag_id}) - - @pytest.mark.skip_if_database_isolation_mode - @mock.patch("airflow.dag_processing.processor.DagFileProcessor._get_dagbag") - def test_dag_file_processor_sla_miss_deleted_task(self, mock_get_dagbag, create_dummy_dag): - """ - Test that the dag file processor will not crash when trying to send - sla miss notification for a deleted task - """ - session = settings.Session() - - test_start_date = timezone.utcnow() - datetime.timedelta(days=1) - test_run_id = DagRunType.SCHEDULED.generate_run_id(test_start_date) - dag, task = create_dummy_dag( - dag_id="test_sla_miss", - task_id="dummy", - email="test@test.com", - default_args={"start_date": test_start_date, "sla": datetime.timedelta(hours=1)}, - ) - - session.merge(TaskInstance(task=task, run_id=test_run_id, state=State.SUCCESS)) - - # Create an SlaMiss where notification was sent, but email was not - session.merge( - SlaMiss(task_id="dummy_deleted", dag_id="test_sla_miss", execution_date=test_start_date) - ) - - mock_dagbag = mock.Mock() - mock_dagbag.get_dag.return_value = dag - mock_get_dagbag.return_value = mock_dagbag - - DagFileProcessor.manage_slas(dag_folder=dag.fileloc, dag_id="test_sla_miss", session=session) - @pytest.mark.skip_if_database_isolation_mode # Test is broken in db isolation mode @patch.object(TaskInstance, "handle_failure") def test_execute_on_failure_callbacks(self, mock_ti_handle_failure): diff --git a/tests/datasets/test_manager.py b/tests/datasets/test_manager.py index 1e7b4fda40ceee..9b8b0c180d48e6 100644 --- a/tests/datasets/test_manager.py +++ b/tests/datasets/test_manager.py @@ -119,9 +119,10 @@ def test_register_dataset_change(self, session, dag_maker, mock_task_instance): session.add(dsm) dsm.consuming_dags = [DagScheduleDatasetReference(dag_id=dag.dag_id) for dag in (dag1, dag2)] session.execute(delete(DatasetDagRunQueue)) - session.commit() + session.flush() dsem.register_dataset_change(task_instance=mock_task_instance, dataset=ds, session=session) + session.flush() # Ensure we've created a dataset assert session.query(DatasetEvent).filter_by(dataset_id=dsm.id).count() == 1 @@ -134,9 +135,10 @@ def test_register_dataset_change_no_downstreams(self, session, mock_task_instanc dsm = DatasetModel(uri="never_consumed") session.add(dsm) session.execute(delete(DatasetDagRunQueue)) - session.commit() + session.flush() dsem.register_dataset_change(task_instance=mock_task_instance, dataset=ds, session=session) + session.flush() # Ensure we've created a dataset assert session.query(DatasetEvent).filter_by(dataset_id=dsm.id).count() == 1 @@ -150,14 +152,15 @@ def test_register_dataset_change_notifies_dataset_listener(self, session, mock_t ds = Dataset(uri="test_dataset_uri_2") dag1 = DagModel(dag_id="dag3") - session.add_all([dag1]) + session.add(dag1) dsm = DatasetModel(uri="test_dataset_uri_2") session.add(dsm) dsm.consuming_dags = [DagScheduleDatasetReference(dag_id=dag1.dag_id)] - session.commit() + session.flush() dsem.register_dataset_change(task_instance=mock_task_instance, dataset=ds, session=session) + session.flush() # Ensure the listener was notified assert len(dataset_listener.changed) == 1 @@ -169,10 +172,11 @@ def test_create_datasets_notifies_dataset_listener(self, session): dataset_listener.clear() get_listener_manager().add_listener(dataset_listener) - dsm = DatasetModel(uri="test_dataset_uri_3") + ds = Dataset(uri="test_dataset_uri_3") - dsem.create_datasets([dsm], session) + dsms = dsem.create_datasets([ds], session=session) # Ensure the listener was notified assert len(dataset_listener.created) == 1 - assert dataset_listener.created[0].uri == dsm.uri + assert len(dsms) == 1 + assert dataset_listener.created[0].uri == ds.uri == dsms[0].uri diff --git a/tests/jobs/test_scheduler_job.py b/tests/jobs/test_scheduler_job.py index 372a6ae2d9dfe2..52e9dbdeb1a042 100644 --- a/tests/jobs/test_scheduler_job.py +++ b/tests/jobs/test_scheduler_job.py @@ -36,13 +36,13 @@ import airflow.example_dags from airflow import settings -from airflow.callbacks.callback_requests import DagCallbackRequest, SlaCallbackRequest, TaskCallbackRequest +from airflow.callbacks.callback_requests import DagCallbackRequest, TaskCallbackRequest from airflow.callbacks.database_callback_sink import DatabaseCallbackSink from airflow.callbacks.pipe_callback_sink import PipeCallbackSink from airflow.dag_processing.manager import DagFileProcessorAgent from airflow.datasets import Dataset from airflow.datasets.manager import DatasetManager -from airflow.exceptions import AirflowException +from airflow.exceptions import AirflowException, RemovedInAirflow3Warning from airflow.executors.base_executor import BaseExecutor from airflow.executors.executor_constants import MOCK_EXECUTOR from airflow.executors.executor_loader import ExecutorLoader @@ -2838,6 +2838,10 @@ def evaluate_dagrun( This is hackish: a dag run is created but its tasks are run by a backfill. """ + + # todo: AIP-78 remove along with DAG.run() + # this only tests the backfill job runner, not the scheduler + if run_kwargs is None: run_kwargs = {} @@ -2898,40 +2902,49 @@ def test_dagrun_fail(self): """ DagRuns with one failed and one incomplete root task -> FAILED """ - self.evaluate_dagrun( - dag_id="test_dagrun_states_fail", - expected_task_states={ - "test_dagrun_fail": State.FAILED, - "test_dagrun_succeed": State.UPSTREAM_FAILED, - }, - dagrun_state=State.FAILED, - ) + # todo: AIP-78 remove along with DAG.run() + # this only tests the backfill job runner, not the scheduler + with pytest.warns(RemovedInAirflow3Warning): + self.evaluate_dagrun( + dag_id="test_dagrun_states_fail", + expected_task_states={ + "test_dagrun_fail": State.FAILED, + "test_dagrun_succeed": State.UPSTREAM_FAILED, + }, + dagrun_state=State.FAILED, + ) def test_dagrun_success(self): """ DagRuns with one failed and one successful root task -> SUCCESS """ - self.evaluate_dagrun( - dag_id="test_dagrun_states_success", - expected_task_states={ - "test_dagrun_fail": State.FAILED, - "test_dagrun_succeed": State.SUCCESS, - }, - dagrun_state=State.SUCCESS, - ) + # todo: AIP-78 remove along with DAG.run() + # this only tests the backfill job runner, not the scheduler + with pytest.warns(RemovedInAirflow3Warning): + self.evaluate_dagrun( + dag_id="test_dagrun_states_success", + expected_task_states={ + "test_dagrun_fail": State.FAILED, + "test_dagrun_succeed": State.SUCCESS, + }, + dagrun_state=State.SUCCESS, + ) def test_dagrun_root_fail(self): """ DagRuns with one successful and one failed root task -> FAILED """ - self.evaluate_dagrun( - dag_id="test_dagrun_states_root_fail", - expected_task_states={ - "test_dagrun_succeed": State.SUCCESS, - "test_dagrun_fail": State.FAILED, - }, - dagrun_state=State.FAILED, - ) + # todo: AIP-78 remove along with DAG.run() + # this only tests the backfill job runner, not the scheduler + with pytest.warns(RemovedInAirflow3Warning): + self.evaluate_dagrun( + dag_id="test_dagrun_states_root_fail", + expected_task_states={ + "test_dagrun_succeed": State.SUCCESS, + "test_dagrun_fail": State.FAILED, + }, + dagrun_state=State.FAILED, + ) def test_dagrun_root_fail_unfinished(self): """ @@ -2952,9 +2965,12 @@ def test_dagrun_root_fail_unfinished(self): ) self.null_exec.mock_task_fail(dag_id, "test_dagrun_fail", dr.run_id) - for _ in _mock_executor(self.null_exec): - with pytest.raises(AirflowException): - dag.run(start_date=dr.execution_date, end_date=dr.execution_date) + # todo: AIP-78 remove this test along with DAG.run() + # this only tests the backfill job runner, not the scheduler + with pytest.warns(RemovedInAirflow3Warning): + for _ in _mock_executor(self.null_exec): + with pytest.raises(AirflowException): + dag.run(start_date=dr.execution_date, end_date=dr.execution_date) # Mark the successful task as never having run since we want to see if the # dagrun will be in a running state despite having an unfinished task. @@ -2994,16 +3010,19 @@ def test_dagrun_deadlock_ignore_depends_on_past_advance_ex_date(self): if ignore_first_depends_on_past=True and the dagrun execution_date is after the start_date. """ - self.evaluate_dagrun( - dag_id="test_dagrun_states_deadlock", - expected_task_states={ - "test_depends_on_past": State.SUCCESS, - "test_depends_on_past_2": State.SUCCESS, - }, - dagrun_state=State.SUCCESS, - advance_execution_date=True, - run_kwargs=dict(ignore_first_depends_on_past=True), - ) + # todo: AIP-78 remove along with DAG.run() + # this only tests the backfill job runner, not the scheduler + with pytest.warns(RemovedInAirflow3Warning): + self.evaluate_dagrun( + dag_id="test_dagrun_states_deadlock", + expected_task_states={ + "test_depends_on_past": State.SUCCESS, + "test_depends_on_past_2": State.SUCCESS, + }, + dagrun_state=State.SUCCESS, + advance_execution_date=True, + run_kwargs=dict(ignore_first_depends_on_past=True), + ) def test_dagrun_deadlock_ignore_depends_on_past(self): """ @@ -3012,15 +3031,18 @@ def test_dagrun_deadlock_ignore_depends_on_past(self): test_dagrun_deadlock_ignore_depends_on_past_advance_ex_date except that start_date == execution_date so depends_on_past is irrelevant). """ - self.evaluate_dagrun( - dag_id="test_dagrun_states_deadlock", - expected_task_states={ - "test_depends_on_past": State.SUCCESS, - "test_depends_on_past_2": State.SUCCESS, - }, - dagrun_state=State.SUCCESS, - run_kwargs=dict(ignore_first_depends_on_past=True), - ) + # todo: AIP-78 remove along with DAG.run() + # this only tests the backfill job runner, not the scheduler + with pytest.warns(RemovedInAirflow3Warning): + self.evaluate_dagrun( + dag_id="test_dagrun_states_deadlock", + expected_task_states={ + "test_depends_on_past": State.SUCCESS, + "test_depends_on_past_2": State.SUCCESS, + }, + dagrun_state=State.SUCCESS, + run_kwargs=dict(ignore_first_depends_on_past=True), + ) @pytest.mark.parametrize( "configs", @@ -3965,82 +3987,6 @@ def test_adopt_or_reset_orphaned_tasks_only_fails_scheduler_jobs(self, caplog): assert old_task_job.state == State.RUNNING assert "Marked 1 SchedulerJob instances as failed" in caplog.messages - def test_send_sla_callbacks_to_processor_sla_disabled(self, dag_maker): - """Test SLA Callbacks are not sent when check_slas is False""" - dag_id = "test_send_sla_callbacks_to_processor_sla_disabled" - with dag_maker(dag_id=dag_id, schedule="@daily") as dag: - EmptyOperator(task_id="task1") - - with patch.object(settings, "CHECK_SLAS", False): - scheduler_job = Job() - self.job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) - scheduler_job.executor = MockExecutor() - self.job_runner._send_sla_callbacks_to_processor(dag) - scheduler_job.executor.callback_sink.send.assert_not_called() - - def test_send_sla_callbacks_to_processor_sla_no_task_slas(self, dag_maker): - """Test SLA Callbacks are not sent when no task SLAs are defined""" - dag_id = "test_send_sla_callbacks_to_processor_sla_no_task_slas" - with dag_maker(dag_id=dag_id, schedule="@daily") as dag: - EmptyOperator(task_id="task1") - - with patch.object(settings, "CHECK_SLAS", True): - scheduler_job = Job() - self.job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) - scheduler_job.executor = MockExecutor() - self.job_runner._send_sla_callbacks_to_processor(dag) - scheduler_job.executor.callback_sink.send.assert_not_called() - - @pytest.mark.parametrize( - "schedule", - [ - "@daily", - "0 10 * * *", - timedelta(hours=2), - ], - ) - def test_send_sla_callbacks_to_processor_sla_with_task_slas(self, schedule, dag_maker): - """Test SLA Callbacks are sent to the DAG Processor when SLAs are defined on tasks""" - dag_id = "test_send_sla_callbacks_to_processor_sla_with_task_slas" - with dag_maker( - dag_id=dag_id, - schedule=schedule, - processor_subdir=TEST_DAG_FOLDER, - ) as dag: - EmptyOperator(task_id="task1", sla=timedelta(seconds=60)) - - with patch.object(settings, "CHECK_SLAS", True): - scheduler_job = Job() - self.job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) - scheduler_job.executor = MockExecutor() - self.job_runner._send_sla_callbacks_to_processor(dag) - expected_callback = SlaCallbackRequest( - full_filepath=dag.fileloc, - dag_id=dag.dag_id, - processor_subdir=TEST_DAG_FOLDER, - ) - scheduler_job.executor.callback_sink.send.assert_called_once_with(expected_callback) - - @pytest.mark.parametrize( - "schedule", - [ - None, - [Dataset("foo")], - ], - ) - def test_send_sla_callbacks_to_processor_sla_dag_not_scheduled(self, schedule, dag_maker): - """Test SLA Callbacks are not sent when DAG isn't scheduled""" - dag_id = "test_send_sla_callbacks_to_processor_sla_no_task_slas" - with dag_maker(dag_id=dag_id, schedule=schedule) as dag: - EmptyOperator(task_id="task1", sla=timedelta(seconds=5)) - - with patch.object(settings, "CHECK_SLAS", True): - scheduler_job = Job() - self.job_runner = SchedulerJobRunner(job=scheduler_job, subdir=os.devnull) - scheduler_job.executor = MockExecutor() - self.job_runner._send_sla_callbacks_to_processor(dag) - scheduler_job.executor.callback_sink.send.assert_not_called() - @pytest.mark.parametrize( "schedule, number_running, excepted", [ @@ -4278,6 +4224,7 @@ def test_no_create_dag_runs_when_dag_disabled(self, session, dag_maker, disable, dataset=ds, session=session, ) + session.flush() assert session.scalars(dse_q).one().source_run_id == dr1.run_id assert session.scalars(ddrq_q).one_or_none() is None @@ -4291,6 +4238,7 @@ def test_no_create_dag_runs_when_dag_disabled(self, session, dag_maker, disable, dataset=ds, session=session, ) + session.flush() assert [e.source_run_id for e in session.scalars(dse_q)] == [dr1.run_id, dr2.run_id] assert session.scalars(ddrq_q).one().target_dag_id == "consumer" @@ -6014,7 +5962,9 @@ def test_execute_queries_count_with_harvested_dags(self, expected_query_count, d self.job_runner.processor_agent = mock_agent with assert_queries_count(expected_query_count, margin=15): - with mock.patch.object(DagRun, "next_dagruns_to_examine") as mock_dagruns: + with mock.patch.object( + DagRun, DagRun.get_running_dag_runs_to_examine.__name__ + ) as mock_dagruns: query = MagicMock() query.all.return_value = dagruns mock_dagruns.return_value = query diff --git a/tests/models/test_baseoperator.py b/tests/models/test_baseoperator.py index 2aa5b76b22c03e..3c5b7634d5a990 100644 --- a/tests/models/test_baseoperator.py +++ b/tests/models/test_baseoperator.py @@ -304,51 +304,6 @@ def test_render_template_with_native_envs(self, content, context, expected_outpu result = task.render_template(content, context) assert result == expected_output - def test_mapped_dag_slas_disabled_classic(self): - class MyOp(BaseOperator): - def __init__(self, x, **kwargs): - self.x = x - super().__init__(**kwargs) - - def execute(self, context): - print(self.x) - - with DAG( - dag_id="test-dag", - schedule=None, - start_date=DEFAULT_DATE, - default_args={"sla": timedelta(minutes=30)}, - ) as dag: - - @dag.task - def get_values(): - return [0, 1, 2] - - task1 = get_values() - with pytest.raises(AirflowException, match="SLAs are unsupported with mapped tasks"): - MyOp.partial(task_id="hi").expand(x=task1) - - def test_mapped_dag_slas_disabled_taskflow(self): - with DAG( - dag_id="test-dag", - schedule=None, - start_date=DEFAULT_DATE, - default_args={"sla": timedelta(minutes=30)}, - ) as dag: - - @dag.task - def get_values(): - return [0, 1, 2] - - task1 = get_values() - - @dag.task - def print_val(x): - print(x) - - with pytest.raises(AirflowException, match="SLAs are unsupported with mapped tasks"): - print_val.expand(x=task1) - @pytest.mark.db_test def test_render_template_fields(self): """Verify if operator attributes are correctly templated.""" diff --git a/tests/models/test_dag.py b/tests/models/test_dag.py index 90d956caeb7ddc..df4a8927688164 100644 --- a/tests/models/test_dag.py +++ b/tests/models/test_dag.py @@ -45,6 +45,7 @@ AirflowException, DuplicateTaskIdFound, ParamValidationError, + RemovedInAirflow3Warning, UnknownExecutorException, ) from airflow.executors import executor_loader @@ -2733,14 +2734,17 @@ def test_dataset_expression(self, session: Session) -> None: @mock.patch("airflow.models.dag.run_job") def test_dag_executors(self, run_job_mock): - dag = DAG(dag_id="test", schedule=None) - reload(executor_loader) - with conf_vars({("core", "executor"): "SequentialExecutor"}): - dag.run() - assert isinstance(run_job_mock.call_args_list[0].kwargs["job"].executor, SequentialExecutor) - - dag.run(local=True) - assert isinstance(run_job_mock.call_args_list[1].kwargs["job"].executor, LocalExecutor) + # todo: AIP-78 remove along with DAG.run() + # this only tests the backfill job runner, not the scheduler + with pytest.warns(RemovedInAirflow3Warning): + dag = DAG(dag_id="test", schedule=None) + reload(executor_loader) + with conf_vars({("core", "executor"): "SequentialExecutor"}): + dag.run() + assert isinstance(run_job_mock.call_args_list[0].kwargs["job"].executor, SequentialExecutor) + + dag.run(local=True) + assert isinstance(run_job_mock.call_args_list[1].kwargs["job"].executor, LocalExecutor) class TestQueries: diff --git a/tests/models/test_dagrun.py b/tests/models/test_dagrun.py index f72f3b2b794fc2..d2f70ce69314b8 100644 --- a/tests/models/test_dagrun.py +++ b/tests/models/test_dagrun.py @@ -931,14 +931,18 @@ def test_next_dagruns_to_examine_only_unpaused(self, session, state): **triggered_by_kwargs, ) - runs = DagRun.next_dagruns_to_examine(state, session).all() + if state == DagRunState.RUNNING: + func = DagRun.get_running_dag_runs_to_examine + else: + func = DagRun.get_queued_dag_runs_to_set_running + runs = func(session).all() assert runs == [dr] orm_dag.is_paused = True session.flush() - runs = DagRun.next_dagruns_to_examine(state, session).all() + runs = func(session).all() assert runs == [] @mock.patch.object(Stats, "timing") diff --git a/tests/models/test_taskinstance.py b/tests/models/test_taskinstance.py index 773c68915cefac..d2922db267805d 100644 --- a/tests/models/test_taskinstance.py +++ b/tests/models/test_taskinstance.py @@ -2325,7 +2325,9 @@ def test_outlet_datasets(self, create_task_instance): ddrq_timestamps = ( session.query(DatasetDagRunQueue.created_at).filter_by(dataset_id=event.dataset.id).all() ) - assert all([event.timestamp < ddrq_timestamp for (ddrq_timestamp,) in ddrq_timestamps]) + assert all( + event.timestamp < ddrq_timestamp for (ddrq_timestamp,) in ddrq_timestamps + ), f"Some items in {[str(t) for t in ddrq_timestamps]} are earlier than {event.timestamp}" @pytest.mark.skip_if_database_isolation_mode # Does not work in db isolation mode def test_outlet_datasets_failed(self, create_task_instance): diff --git a/tests/providers/amazon/aws/auth_manager/cli/test_definition.py b/tests/providers/amazon/aws/auth_manager/cli/test_definition.py index 5866aa594f8ec2..079df886f60395 100644 --- a/tests/providers/amazon/aws/auth_manager/cli/test_definition.py +++ b/tests/providers/amazon/aws/auth_manager/cli/test_definition.py @@ -21,4 +21,4 @@ class TestAwsCliDefinition: def test_aws_auth_manager_cli_commands(self): - assert len(AWS_AUTH_MANAGER_COMMANDS) == 3 + assert len(AWS_AUTH_MANAGER_COMMANDS) == 2 diff --git a/tests/providers/amazon/aws/auth_manager/cli/test_idc_commands.py b/tests/providers/amazon/aws/auth_manager/cli/test_idc_commands.py deleted file mode 100644 index 394704474f1bbe..00000000000000 --- a/tests/providers/amazon/aws/auth_manager/cli/test_idc_commands.py +++ /dev/null @@ -1,140 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import importlib -from unittest.mock import Mock, patch - -import pytest - -from airflow.cli import cli_parser -from airflow.providers.amazon.aws.auth_manager.cli.idc_commands import init_idc -from tests.test_utils.compat import AIRFLOW_V_2_8_PLUS -from tests.test_utils.config import conf_vars - -mock_boto3 = Mock() - -pytestmark = [ - pytest.mark.skipif(not AIRFLOW_V_2_8_PLUS, reason="Test requires Airflow 2.8+"), - pytest.mark.skip_if_database_isolation_mode, -] - - -@pytest.mark.db_test -class TestIdcCommands: - def setup_method(self): - mock_boto3.reset_mock() - - @classmethod - def setup_class(cls): - with conf_vars( - { - ( - "core", - "auth_manager", - ): "airflow.providers.amazon.aws.auth_manager.aws_auth_manager.AwsAuthManager" - } - ): - importlib.reload(cli_parser) - cls.arg_parser = cli_parser.get_parser() - - @pytest.mark.parametrize( - "dry_run, verbose", - [ - (False, False), - (True, True), - ], - ) - @patch("airflow.providers.amazon.aws.auth_manager.cli.idc_commands._get_client") - def test_init_idc_with_no_existing_resources(self, mock_get_client, dry_run, verbose): - mock_get_client.return_value = mock_boto3 - - instance_name = "test-instance" - instance_arn = "test-instance-arn" - application_name = "test-application" - application_arn = "test-application-arn" - - paginator = Mock() - paginator.paginate.return_value = [] - - mock_boto3.list_instances.return_value = {"Instances": []} - mock_boto3.create_instance.return_value = {"InstanceArn": instance_arn} - mock_boto3.get_paginator.return_value = paginator - mock_boto3.create_application.return_value = {"ApplicationArn": application_arn} - - with conf_vars({("database", "check_migrations"): "False"}): - params = [ - "aws-auth-manager", - "init-identity-center", - "--instance-name", - instance_name, - "--application-name", - application_name, - ] - if dry_run: - params.append("--dry-run") - if verbose: - params.append("--verbose") - init_idc(self.arg_parser.parse_args(params)) - - mock_boto3.list_instances.assert_called_once_with() - if not dry_run: - mock_boto3.create_instance.assert_called_once_with(Name=instance_name) - mock_boto3.create_application.assert_called_once() - - @pytest.mark.parametrize( - "dry_run, verbose", - [ - (False, False), - (True, True), - ], - ) - @patch("airflow.providers.amazon.aws.auth_manager.cli.idc_commands._get_client") - def test_init_idc_with_existing_resources(self, mock_get_client, dry_run, verbose): - mock_get_client.return_value = mock_boto3 - - instance_name = "test-instance" - instance_arn = "test-instance-arn" - application_name = "test-application" - application_arn = "test-application-arn" - - paginator = Mock() - paginator.paginate.return_value = [ - {"Applications": [{"Name": application_name, "ApplicationArn": application_arn}]} - ] - - mock_boto3.list_instances.return_value = {"Instances": [{"InstanceArn": instance_arn}]} - mock_boto3.get_paginator.return_value = paginator - - with conf_vars({("database", "check_migrations"): "False"}): - params = [ - "aws-auth-manager", - "init-identity-center", - "--instance-name", - instance_name, - "--application-name", - application_name, - ] - if dry_run: - params.append("--dry-run") - if verbose: - params.append("--verbose") - init_idc(self.arg_parser.parse_args(params)) - - mock_boto3.list_instances.assert_called_once_with() - mock_boto3.create_instance.assert_not_called() - mock_boto3.create_application.assert_not_called() diff --git a/tests/providers/amazon/aws/auth_manager/views/test_auth.py b/tests/providers/amazon/aws/auth_manager/views/test_auth.py index 435dd8d2c32fec..05d2fb84b51cf1 100644 --- a/tests/providers/amazon/aws/auth_manager/views/test_auth.py +++ b/tests/providers/amazon/aws/auth_manager/views/test_auth.py @@ -69,7 +69,7 @@ def aws_app(): ) as mock_is_policy_store_schema_up_to_date: mock_is_policy_store_schema_up_to_date.return_value = True mock_parser.parse_remote.return_value = SAML_METADATA_PARSED - return application.create_app(testing=True) + return application.create_app(testing=True, config={"WTF_CSRF_ENABLED": False}) @pytest.mark.db_test @@ -82,7 +82,7 @@ def test_login_redirect_to_identity_center(self, aws_app): def test_logout_redirect_to_identity_center(self, aws_app): with aws_app.test_client() as client: - response = client.get("/logout") + response = client.post("/logout") assert response.status_code == 302 assert response.location.startswith("https://portal.sso.us-east-1.amazonaws.com/saml/logout/") diff --git a/tests/providers/amazon/aws/hooks/test_redshift_data.py b/tests/providers/amazon/aws/hooks/test_redshift_data.py index a0952e5ba72597..d5480864498120 100644 --- a/tests/providers/amazon/aws/hooks/test_redshift_data.py +++ b/tests/providers/amazon/aws/hooks/test_redshift_data.py @@ -19,6 +19,7 @@ import logging from unittest import mock +from uuid import uuid4 import pytest @@ -63,15 +64,18 @@ def test_execute_without_waiting(self, mock_conn): mock_conn.describe_statement.assert_not_called() @pytest.mark.parametrize( - "cluster_identifier, workgroup_name", + "cluster_identifier, workgroup_name, session_id", [ - (None, None), - ("some_cluster", "some_workgroup"), + (None, None, None), + ("some_cluster", "some_workgroup", None), + (None, "some_workgroup", None), + ("some_cluster", None, None), + (None, None, "some_session_id"), ], ) @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") - def test_execute_requires_either_cluster_identifier_or_workgroup_name( - self, mock_conn, cluster_identifier, workgroup_name + def test_execute_requires_one_of_cluster_identifier_or_workgroup_name_or_session_id( + self, mock_conn, cluster_identifier, workgroup_name, session_id ): mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID} cluster_identifier = "cluster_identifier" @@ -84,6 +88,51 @@ def test_execute_requires_either_cluster_identifier_or_workgroup_name( workgroup_name=workgroup_name, sql=SQL, wait_for_completion=False, + session_id=session_id, + ) + + @pytest.mark.parametrize( + "cluster_identifier, workgroup_name, session_id", + [ + (None, None, None), + ("some_cluster", "some_workgroup", None), + (None, "some_workgroup", None), + ("some_cluster", None, None), + (None, None, "some_session_id"), + ], + ) + @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") + def test_execute_session_keep_alive_seconds_valid( + self, mock_conn, cluster_identifier, workgroup_name, session_id + ): + mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID} + cluster_identifier = "cluster_identifier" + workgroup_name = "workgroup_name" + hook = RedshiftDataHook() + with pytest.raises(ValueError): + hook.execute_query( + database=DATABASE, + cluster_identifier=cluster_identifier, + workgroup_name=workgroup_name, + sql=SQL, + wait_for_completion=False, + session_id=session_id, + ) + + @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") + def test_execute_session_id_valid(self, mock_conn): + mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID} + cluster_identifier = "cluster_identifier" + workgroup_name = "workgroup_name" + hook = RedshiftDataHook() + with pytest.raises(ValueError): + hook.execute_query( + database=DATABASE, + cluster_identifier=cluster_identifier, + workgroup_name=workgroup_name, + sql=SQL, + wait_for_completion=False, + session_id="not_a_uuid", ) @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") @@ -156,6 +205,74 @@ def test_execute_with_all_parameters_workgroup_name(self, mock_conn): Id=STATEMENT_ID, ) + @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") + def test_execute_with_new_session(self, mock_conn): + cluster_identifier = "cluster_identifier" + db_user = "db_user" + secret_arn = "secret_arn" + statement_name = "statement_name" + parameters = [{"name": "id", "value": "1"}] + mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID, "SessionId": "session_id"} + mock_conn.describe_statement.return_value = {"Status": "FINISHED"} + + hook = RedshiftDataHook() + output = hook.execute_query( + sql=SQL, + database=DATABASE, + cluster_identifier=cluster_identifier, + db_user=db_user, + secret_arn=secret_arn, + statement_name=statement_name, + parameters=parameters, + session_keep_alive_seconds=123, + ) + assert output.statement_id == STATEMENT_ID + assert output.session_id == "session_id" + + mock_conn.execute_statement.assert_called_once_with( + Database=DATABASE, + Sql=SQL, + ClusterIdentifier=cluster_identifier, + DbUser=db_user, + SecretArn=secret_arn, + StatementName=statement_name, + Parameters=parameters, + WithEvent=False, + SessionKeepAliveSeconds=123, + ) + mock_conn.describe_statement.assert_called_once_with( + Id=STATEMENT_ID, + ) + + @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") + def test_execute_reuse_session(self, mock_conn): + statement_name = "statement_name" + parameters = [{"name": "id", "value": "1"}] + mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID, "SessionId": "session_id"} + mock_conn.describe_statement.return_value = {"Status": "FINISHED"} + hook = RedshiftDataHook() + session_id = str(uuid4()) + output = hook.execute_query( + database=None, + sql=SQL, + statement_name=statement_name, + parameters=parameters, + session_id=session_id, + ) + assert output.statement_id == STATEMENT_ID + assert output.session_id == "session_id" + + mock_conn.execute_statement.assert_called_once_with( + Sql=SQL, + StatementName=statement_name, + Parameters=parameters, + WithEvent=False, + SessionId=session_id, + ) + mock_conn.describe_statement.assert_called_once_with( + Id=STATEMENT_ID, + ) + @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") def test_batch_execute(self, mock_conn): mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID} diff --git a/tests/providers/amazon/aws/operators/test_redshift_data.py b/tests/providers/amazon/aws/operators/test_redshift_data.py index abfa2b038b98bf..c22d776a94b448 100644 --- a/tests/providers/amazon/aws/operators/test_redshift_data.py +++ b/tests/providers/amazon/aws/operators/test_redshift_data.py @@ -22,6 +22,7 @@ import pytest from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, TaskDeferred +from airflow.providers.amazon.aws.hooks.redshift_data import QueryExecutionOutput from airflow.providers.amazon.aws.operators.redshift_data import RedshiftDataOperator from airflow.providers.amazon.aws.triggers.redshift_data import RedshiftDataTrigger from tests.providers.amazon.aws.utils.test_template_fields import validate_template_fields @@ -31,6 +32,7 @@ SQL = "sql" DATABASE = "database" STATEMENT_ID = "statement_id" +SESSION_ID = "session_id" @pytest.fixture @@ -98,6 +100,8 @@ def test_execute(self, mock_exec_query): poll_interval = 5 wait_for_completion = True + mock_exec_query.return_value = QueryExecutionOutput(statement_id=STATEMENT_ID, session_id=None) + operator = RedshiftDataOperator( aws_conn_id=CONN_ID, task_id=TASK_ID, @@ -111,7 +115,8 @@ def test_execute(self, mock_exec_query): wait_for_completion=True, poll_interval=poll_interval, ) - operator.execute(None) + mock_ti = mock.MagicMock(name="MockedTaskInstance") + operator.execute({"ti": mock_ti}) mock_exec_query.assert_called_once_with( sql=SQL, database=DATABASE, @@ -124,8 +129,12 @@ def test_execute(self, mock_exec_query): with_event=False, wait_for_completion=wait_for_completion, poll_interval=poll_interval, + session_id=None, + session_keep_alive_seconds=None, ) + mock_ti.xcom_push.assert_not_called() + @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.execute_query") def test_execute_with_workgroup_name(self, mock_exec_query): cluster_identifier = None @@ -150,7 +159,54 @@ def test_execute_with_workgroup_name(self, mock_exec_query): wait_for_completion=True, poll_interval=poll_interval, ) - operator.execute(None) + mock_ti = mock.MagicMock(name="MockedTaskInstance") + operator.execute({"ti": mock_ti}) + mock_exec_query.assert_called_once_with( + sql=SQL, + database=DATABASE, + cluster_identifier=cluster_identifier, + workgroup_name=workgroup_name, + db_user=db_user, + secret_arn=secret_arn, + statement_name=statement_name, + parameters=parameters, + with_event=False, + wait_for_completion=wait_for_completion, + poll_interval=poll_interval, + session_id=None, + session_keep_alive_seconds=None, + ) + + @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.execute_query") + def test_execute_new_session(self, mock_exec_query): + cluster_identifier = "cluster_identifier" + workgroup_name = None + db_user = "db_user" + secret_arn = "secret_arn" + statement_name = "statement_name" + parameters = [{"name": "id", "value": "1"}] + poll_interval = 5 + wait_for_completion = True + + mock_exec_query.return_value = QueryExecutionOutput(statement_id=STATEMENT_ID, session_id=SESSION_ID) + + operator = RedshiftDataOperator( + aws_conn_id=CONN_ID, + task_id=TASK_ID, + sql=SQL, + database=DATABASE, + cluster_identifier=cluster_identifier, + db_user=db_user, + secret_arn=secret_arn, + statement_name=statement_name, + parameters=parameters, + wait_for_completion=True, + poll_interval=poll_interval, + session_keep_alive_seconds=123, + ) + + mock_ti = mock.MagicMock(name="MockedTaskInstance") + operator.execute({"ti": mock_ti}) mock_exec_query.assert_called_once_with( sql=SQL, database=DATABASE, @@ -163,7 +219,11 @@ def test_execute_with_workgroup_name(self, mock_exec_query): with_event=False, wait_for_completion=wait_for_completion, poll_interval=poll_interval, + session_id=None, + session_keep_alive_seconds=123, ) + assert mock_ti.xcom_push.call_args.kwargs["key"] == "session_id" + assert mock_ti.xcom_push.call_args.kwargs["value"] == SESSION_ID @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") def test_on_kill_without_query(self, mock_conn): @@ -180,7 +240,7 @@ def test_on_kill_without_query(self, mock_conn): @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") def test_on_kill_with_query(self, mock_conn): - mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID} + mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID, "SessionId": SESSION_ID} operator = RedshiftDataOperator( aws_conn_id=CONN_ID, task_id=TASK_ID, @@ -189,7 +249,8 @@ def test_on_kill_with_query(self, mock_conn): database=DATABASE, wait_for_completion=False, ) - operator.execute(None) + mock_ti = mock.MagicMock(name="MockedTaskInstance") + operator.execute({"ti": mock_ti}) operator.on_kill() mock_conn.cancel_statement.assert_called_once_with( Id=STATEMENT_ID, @@ -198,7 +259,7 @@ def test_on_kill_with_query(self, mock_conn): @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") def test_return_sql_result(self, mock_conn): expected_result = {"Result": True} - mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID} + mock_conn.execute_statement.return_value = {"Id": STATEMENT_ID, "SessionId": SESSION_ID} mock_conn.describe_statement.return_value = {"Status": "FINISHED"} mock_conn.get_statement_result.return_value = expected_result cluster_identifier = "cluster_identifier" @@ -216,7 +277,8 @@ def test_return_sql_result(self, mock_conn): aws_conn_id=CONN_ID, return_sql_result=True, ) - actual_result = operator.execute(None) + mock_ti = mock.MagicMock(name="MockedTaskInstance") + actual_result = operator.execute({"ti": mock_ti}) assert actual_result == expected_result mock_conn.execute_statement.assert_called_once_with( Database=DATABASE, @@ -260,7 +322,9 @@ def test_execute_finished_before_defer(self, mock_exec_query, check_query_is_fin poll_interval=poll_interval, deferrable=True, ) - operator.execute(None) + + mock_ti = mock.MagicMock(name="MockedTaskInstance") + operator.execute({"ti": mock_ti}) assert not mock_defer.called mock_exec_query.assert_called_once_with( @@ -275,6 +339,8 @@ def test_execute_finished_before_defer(self, mock_exec_query, check_query_is_fin with_event=False, wait_for_completion=False, poll_interval=poll_interval, + session_id=None, + session_keep_alive_seconds=None, ) @mock.patch( @@ -283,8 +349,9 @@ def test_execute_finished_before_defer(self, mock_exec_query, check_query_is_fin ) @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.execute_query") def test_execute_defer(self, mock_exec_query, check_query_is_finished, deferrable_operator): + mock_ti = mock.MagicMock(name="MockedTaskInstance") with pytest.raises(TaskDeferred) as exc: - deferrable_operator.execute(None) + deferrable_operator.execute({"ti": mock_ti}) assert isinstance(exc.value.trigger, RedshiftDataTrigger) @@ -346,7 +413,8 @@ def test_no_wait_for_completion(self, mock_exec_query, mock_check_query_is_finis poll_interval=poll_interval, deferrable=deferrable, ) - operator.execute(None) + mock_ti = mock.MagicMock(name="MockedTaskInstance") + operator.execute({"ti": mock_ti}) assert not mock_check_query_is_finished.called assert not mock_defer.called diff --git a/tests/providers/amazon/aws/sensors/test_dynamodb.py b/tests/providers/amazon/aws/sensors/test_dynamodb.py index d0a7eab8fc3ab8..93ca01d26275d3 100644 --- a/tests/providers/amazon/aws/sensors/test_dynamodb.py +++ b/tests/providers/amazon/aws/sensors/test_dynamodb.py @@ -36,7 +36,7 @@ def setup_method(self): self.attribute_name = "Foo" self.attribute_value = "Bar" - self.sensor = DynamoDBValueSensor( + self.sensor_pk_sk = DynamoDBValueSensor( task_id="dynamodb_value_sensor", table_name=self.table_name, partition_key_name=self.pk_name, @@ -47,6 +47,15 @@ def setup_method(self): sort_key_value=self.sk_value, ) + self.sensor_pk = DynamoDBValueSensor( + task_id="dynamodb_value_sensor", + table_name=self.table_name, + partition_key_name=self.pk_name, + partition_key_value=self.pk_value, + attribute_name=self.attribute_name, + attribute_value=self.attribute_value, + ) + @mock_aws def test_sensor_with_pk(self): hook = DynamoDBHook(table_name=self.table_name, table_keys=[self.pk_name]) @@ -58,12 +67,12 @@ def test_sensor_with_pk(self): ProvisionedThroughput={"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}, ) - assert not self.sensor.poke(None) + assert not self.sensor_pk.poke(None) items = [{self.pk_name: self.pk_value, self.attribute_name: self.attribute_value}] hook.write_batch_data(items) - assert self.sensor.poke(None) + assert self.sensor_pk.poke(None) @mock_aws def test_sensor_with_pk_and_sk(self): @@ -82,7 +91,7 @@ def test_sensor_with_pk_and_sk(self): ProvisionedThroughput={"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}, ) - assert not self.sensor.poke(None) + assert not self.sensor_pk_sk.poke(None) items = [ { @@ -93,7 +102,24 @@ def test_sensor_with_pk_and_sk(self): ] hook.write_batch_data(items) - assert self.sensor.poke(None) + assert self.sensor_pk_sk.poke(None) + + @mock_aws + def test_sensor_with_client_error(self): + hook = DynamoDBHook(table_name=self.table_name, table_keys=[self.pk_name]) + + hook.conn.create_table( + TableName=self.table_name, + KeySchema=[{"AttributeName": self.pk_name, "KeyType": "HASH"}], + AttributeDefinitions=[{"AttributeName": self.pk_name, "AttributeType": "S"}], + ProvisionedThroughput={"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}, + ) + + items = [{self.pk_name: self.pk_value, self.attribute_name: self.attribute_value}] + hook.write_batch_data(items) + + self.sensor_pk.partition_key_name = "no such key" + assert self.sensor_pk.poke(None) is False class TestDynamoDBMultipleValuesSensor: @@ -116,6 +142,14 @@ def setup_method(self): sort_key_name=self.sk_name, sort_key_value=self.sk_value, ) + self.sensor_pk = DynamoDBValueSensor( + task_id="dynamodb_value_sensor", + table_name=self.table_name, + partition_key_name=self.pk_name, + partition_key_value=self.pk_value, + attribute_name=self.attribute_name, + attribute_value=self.attribute_value, + ) def test_init(self): sensor = DynamoDBValueSensor( @@ -167,12 +201,12 @@ def test_sensor_with_pk(self): ProvisionedThroughput={"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}, ) - assert not self.sensor.poke(None) + assert not self.sensor_pk.poke(None) items = [{self.pk_name: self.pk_value, self.attribute_name: self.attribute_value[1]}] hook.write_batch_data(items) - assert self.sensor.poke(None) + assert self.sensor_pk.poke(None) @mock_aws def test_sensor_with_pk_and_sk(self): diff --git a/tests/providers/amazon/aws/utils/test_openlineage.py b/tests/providers/amazon/aws/utils/test_openlineage.py index b3e820b58185ea..195db068d3092d 100644 --- a/tests/providers/amazon/aws/utils/test_openlineage.py +++ b/tests/providers/amazon/aws/utils/test_openlineage.py @@ -21,7 +21,7 @@ import pytest -from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook +from airflow.providers.amazon.aws.hooks.redshift_data import QueryExecutionOutput, RedshiftDataHook from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook from airflow.providers.amazon.aws.utils.openlineage import ( get_facets_from_redshift_table, @@ -58,7 +58,7 @@ def test_get_facets_from_redshift_table_sql_hook(mock_get_records): @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.execute_query") @mock.patch("airflow.providers.amazon.aws.hooks.redshift_data.RedshiftDataHook.conn") def test_get_facets_from_redshift_table_data_hook(mock_connection, mock_execute_query): - mock_execute_query.return_value = "statement_id" + mock_execute_query.return_value = QueryExecutionOutput(statement_id="statement_id", session_id=None) mock_connection.get_statement_result.return_value = { "Records": [ [ diff --git a/tests/providers/apache/beam/operators/test_beam.py b/tests/providers/apache/beam/operators/test_beam.py index 33e2e6dfe714aa..6d1b4b5d1b958e 100644 --- a/tests/providers/apache/beam/operators/test_beam.py +++ b/tests/providers/apache/beam/operators/test_beam.py @@ -257,7 +257,9 @@ def test_on_kill_dataflow_runner(self, dataflow_hook_mock, _, __, ___): op.dataflow_job_id = JOB_ID op.on_kill() - dataflow_cancel_job.assert_called_once_with(job_id=JOB_ID, project_id=op.dataflow_config.project_id) + dataflow_cancel_job.assert_called_once_with( + job_id=JOB_ID, project_id=op.dataflow_config.project_id, location=op.dataflow_config.location + ) @mock.patch(BEAM_OPERATOR_PATH.format("BeamHook")) @mock.patch(BEAM_OPERATOR_PATH.format("DataflowHook")) @@ -465,7 +467,9 @@ def test_on_kill_dataflow_runner(self, dataflow_hook_mock, _, __, ___): op.dataflow_job_id = JOB_ID op.on_kill() - dataflow_cancel_job.assert_called_once_with(job_id=JOB_ID, project_id=op.dataflow_config.project_id) + dataflow_cancel_job.assert_called_once_with( + job_id=JOB_ID, project_id=op.dataflow_config.project_id, location=op.dataflow_config.location + ) @mock.patch(BEAM_OPERATOR_PATH.format("BeamHook")) @mock.patch(BEAM_OPERATOR_PATH.format("DataflowHook")) @@ -859,7 +863,9 @@ def test_on_kill_dataflow_runner(self, dataflow_hook_mock, _, __, ___): op.dataflow_job_id = JOB_ID op.on_kill() - dataflow_cancel_job.assert_called_once_with(job_id=JOB_ID, project_id=op.dataflow_config.project_id) + dataflow_cancel_job.assert_called_once_with( + job_id=JOB_ID, project_id=op.dataflow_config.project_id, location=op.dataflow_config.location + ) @mock.patch(BEAM_OPERATOR_PATH.format("BeamHook")) @mock.patch(BEAM_OPERATOR_PATH.format("DataflowHook")) @@ -989,7 +995,9 @@ def test_on_kill_dataflow_runner(self, dataflow_hook_mock, _, __, ___): op.execute(context=mock.MagicMock()) op.dataflow_job_id = JOB_ID op.on_kill() - dataflow_cancel_job.assert_called_once_with(job_id=JOB_ID, project_id=op.dataflow_config.project_id) + dataflow_cancel_job.assert_called_once_with( + job_id=JOB_ID, project_id=op.dataflow_config.project_id, location=op.dataflow_config.location + ) @mock.patch(BEAM_OPERATOR_PATH.format("BeamHook")) @mock.patch(BEAM_OPERATOR_PATH.format("DataflowHook")) @@ -1108,7 +1116,9 @@ def test_on_kill_dataflow_runner(self, dataflow_hook_mock, _, __, ___): op.execute(context=mock.MagicMock()) op.dataflow_job_id = JOB_ID op.on_kill() - dataflow_cancel_job.assert_called_once_with(job_id=JOB_ID, project_id=op.dataflow_config.project_id) + dataflow_cancel_job.assert_called_once_with( + job_id=JOB_ID, project_id=op.dataflow_config.project_id, location=op.dataflow_config.location + ) @mock.patch(BEAM_OPERATOR_PATH.format("BeamHook")) @mock.patch(BEAM_OPERATOR_PATH.format("DataflowHook")) diff --git a/tests/providers/edge/cli/__init__.py b/tests/providers/edge/cli/__init__.py new file mode 100644 index 00000000000000..217e5db9607827 --- /dev/null +++ b/tests/providers/edge/cli/__init__.py @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/edge/cli/test_edge_command.py b/tests/providers/edge/cli/test_edge_command.py new file mode 100644 index 00000000000000..398c221db02f9c --- /dev/null +++ b/tests/providers/edge/cli/test_edge_command.py @@ -0,0 +1,259 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from datetime import datetime +from pathlib import Path +from subprocess import Popen +from unittest.mock import patch + +import pytest +import time_machine + +from airflow.exceptions import AirflowException +from airflow.providers.edge.cli.edge_command import ( + _EdgeWorkerCli, + _get_sysinfo, + _Job, +) +from airflow.providers.edge.models.edge_job import EdgeJob +from airflow.providers.edge.models.edge_worker import EdgeWorker, EdgeWorkerState +from airflow.utils.state import TaskInstanceState +from tests.test_utils.config import conf_vars + +pytest.importorskip("pydantic", minversion="2.0.0") + +# Ignore the following error for mocking +# mypy: disable-error-code="attr-defined" + + +def test_get_sysinfo(): + sysinfo = _get_sysinfo() + assert "airflow_version" in sysinfo + assert "edge_provider_version" in sysinfo + + +class TestEdgeWorkerCli: + @pytest.fixture + def dummy_joblist(self, tmp_path: Path) -> list[_Job]: + logfile = tmp_path / "file.log" + logfile.touch() + + class MockPopen(Popen): + generated_returncode = None + + def __init__(self): + pass + + def poll(self): + pass + + @property + def returncode(self): + return self.generated_returncode + + return [ + _Job( + edge_job=EdgeJob( + dag_id="test", + task_id="test1", + run_id="test", + map_index=-1, + try_number=1, + state=TaskInstanceState.RUNNING, + queue="test", + command=["test", "command"], + queued_dttm=datetime.now(), + edge_worker=None, + last_update=None, + ), + process=MockPopen(), + logfile=logfile, + logsize=0, + ), + ] + + @pytest.fixture + def worker_with_job(self, tmp_path: Path, dummy_joblist: list[_Job]) -> _EdgeWorkerCli: + test_worker = _EdgeWorkerCli(tmp_path / "dummy.pid", "dummy", None, 8, 5, 5) + test_worker.jobs = dummy_joblist + return test_worker + + @pytest.mark.parametrize( + "reserve_result, fetch_result, expected_calls", + [ + pytest.param(None, False, (0, 0), id="no_job"), + pytest.param( + EdgeJob( + dag_id="test", + task_id="test", + run_id="test", + map_index=-1, + try_number=1, + state=TaskInstanceState.QUEUED, + queue="test", + command=["test", "command"], + queued_dttm=datetime.now(), + edge_worker=None, + last_update=None, + ), + True, + (1, 1), + id="new_job", + ), + ], + ) + @patch("airflow.providers.edge.models.edge_job.EdgeJob.reserve_task") + @patch("airflow.providers.edge.models.edge_logs.EdgeLogs.logfile_path") + @patch("airflow.providers.edge.models.edge_job.EdgeJob.set_state") + @patch("subprocess.Popen") + def test_fetch_job( + self, + mock_popen, + mock_set_state, + mock_logfile_path, + mock_reserve_task, + reserve_result, + fetch_result, + expected_calls, + worker_with_job: _EdgeWorkerCli, + ): + logfile_path_call_count, set_state_call_count = expected_calls + mock_reserve_task.side_effect = [reserve_result] + mock_popen.side_effect = ["dummy"] + with conf_vars({("edge", "api_url"): "https://mock.server"}): + got_job = worker_with_job.fetch_job() + mock_reserve_task.assert_called_once() + assert got_job == fetch_result + assert mock_logfile_path.call_count == logfile_path_call_count + assert mock_set_state.call_count == set_state_call_count + + def test_check_running_jobs_running(self, worker_with_job: _EdgeWorkerCli): + worker_with_job.jobs[0].process.generated_returncode = None + with conf_vars({("edge", "api_url"): "https://mock.server"}): + worker_with_job.check_running_jobs() + assert len(worker_with_job.jobs) == 1 + + @patch("airflow.providers.edge.models.edge_job.EdgeJob.set_state") + def test_check_running_jobs_success(self, mock_set_state, worker_with_job: _EdgeWorkerCli): + job = worker_with_job.jobs[0] + job.process.generated_returncode = 0 + with conf_vars({("edge", "api_url"): "https://mock.server"}): + worker_with_job.check_running_jobs() + assert len(worker_with_job.jobs) == 0 + mock_set_state.assert_called_once_with(job.edge_job.key, TaskInstanceState.SUCCESS) + + @patch("airflow.providers.edge.models.edge_job.EdgeJob.set_state") + def test_check_running_jobs_failed(self, mock_set_state, worker_with_job: _EdgeWorkerCli): + job = worker_with_job.jobs[0] + job.process.generated_returncode = 42 + with conf_vars({("edge", "api_url"): "https://mock.server"}): + worker_with_job.check_running_jobs() + assert len(worker_with_job.jobs) == 0 + mock_set_state.assert_called_once_with(job.edge_job.key, TaskInstanceState.FAILED) + + @time_machine.travel(datetime.now(), tick=False) + @patch("airflow.providers.edge.models.edge_logs.EdgeLogs.push_logs") + def test_check_running_jobs_log_push(self, mock_push_logs, worker_with_job: _EdgeWorkerCli): + job = worker_with_job.jobs[0] + job.process.generated_returncode = None + job.logfile.write_text("some log content") + with conf_vars({("edge", "api_url"): "https://mock.server"}): + worker_with_job.check_running_jobs() + assert len(worker_with_job.jobs) == 1 + mock_push_logs.assert_called_once_with( + task=job.edge_job.key, log_chunk_time=datetime.now(), log_chunk_data="some log content" + ) + + @time_machine.travel(datetime.now(), tick=False) + @patch("airflow.providers.edge.models.edge_logs.EdgeLogs.push_logs") + def test_check_running_jobs_log_push_increment(self, mock_push_logs, worker_with_job: _EdgeWorkerCli): + job = worker_with_job.jobs[0] + job.process.generated_returncode = None + job.logfile.write_text("hello ") + job.logsize = job.logfile.stat().st_size + job.logfile.write_text("hello world") + with conf_vars({("edge", "api_url"): "https://mock.server"}): + worker_with_job.check_running_jobs() + assert len(worker_with_job.jobs) == 1 + mock_push_logs.assert_called_once_with( + task=job.edge_job.key, log_chunk_time=datetime.now(), log_chunk_data="world" + ) + + @pytest.mark.parametrize( + "drain, jobs, expected_state", + [ + pytest.param(False, True, EdgeWorkerState.RUNNING, id="running_jobs"), + pytest.param(True, True, EdgeWorkerState.TERMINATING, id="shutting_down"), + pytest.param(False, False, EdgeWorkerState.IDLE, id="idle"), + ], + ) + @patch("airflow.providers.edge.models.edge_worker.EdgeWorker.set_state") + def test_heartbeat(self, mock_set_state, drain, jobs, expected_state, worker_with_job: _EdgeWorkerCli): + if not jobs: + worker_with_job.jobs = [] + _EdgeWorkerCli.drain = drain + with conf_vars({("edge", "api_url"): "https://mock.server"}): + worker_with_job.heartbeat() + assert mock_set_state.call_args.args[1] == expected_state + + @patch("airflow.providers.edge.models.edge_worker.EdgeWorker.register_worker") + def test_start_missing_apiserver(self, mock_register_worker, worker_with_job: _EdgeWorkerCli): + mock_register_worker.side_effect = AirflowException( + "Something with 404:NOT FOUND means API is not active" + ) + with pytest.raises(SystemExit, match=r"API endpoint is not ready"): + worker_with_job.start() + + @patch("airflow.providers.edge.models.edge_worker.EdgeWorker.register_worker") + def test_start_server_error(self, mock_register_worker, worker_with_job: _EdgeWorkerCli): + mock_register_worker.side_effect = AirflowException("Something other error not FourhundretFour") + with pytest.raises(SystemExit, match=r"Something other"): + worker_with_job.start() + + @patch("airflow.providers.edge.models.edge_worker.EdgeWorker.register_worker") + @patch("airflow.providers.edge.cli.edge_command._EdgeWorkerCli.loop") + @patch("airflow.providers.edge.models.edge_worker.EdgeWorker.set_state") + def test_start_and_run_one( + self, mock_set_state, mock_loop, mock_register_worker, worker_with_job: _EdgeWorkerCli + ): + mock_register_worker.side_effect = [ + EdgeWorker( + worker_name="test", + state=EdgeWorkerState.STARTING, + queues=None, + first_online=datetime.now(), + last_update=datetime.now(), + jobs_active=0, + jobs_taken=0, + jobs_success=0, + jobs_failed=0, + sysinfo="", + ) + ] + + def stop_running(): + _EdgeWorkerCli.drain = True + worker_with_job.jobs = [] + + mock_loop.side_effect = stop_running + + worker_with_job.start() + + mock_register_worker.assert_called_once() + mock_loop.assert_called_once() + mock_set_state.assert_called_once() diff --git a/tests/providers/edge/models/test_edge_worker.py b/tests/providers/edge/models/test_edge_worker.py index 9eca293bafe3f6..f0e0ac9dfa0568 100644 --- a/tests/providers/edge/models/test_edge_worker.py +++ b/tests/providers/edge/models/test_edge_worker.py @@ -20,11 +20,14 @@ import pytest +from airflow.providers.edge.cli.edge_command import _get_sysinfo from airflow.providers.edge.models.edge_worker import ( EdgeWorker, EdgeWorkerModel, + EdgeWorkerState, EdgeWorkerVersionException, ) +from airflow.utils import timezone if TYPE_CHECKING: from sqlalchemy.orm import Session @@ -63,3 +66,29 @@ def test_assert_version(self): EdgeWorker.assert_version( {"airflow_version": airflow_version, "edge_provider_version": edge_provider_version} ) + + def test_register_worker(self, session: Session): + EdgeWorker.register_worker( + "test_worker", EdgeWorkerState.STARTING, queues=None, sysinfo=_get_sysinfo() + ) + + worker: list[EdgeWorkerModel] = session.query(EdgeWorkerModel).all() + assert len(worker) == 1 + assert worker[0].worker_name == "test_worker" + + def test_set_state(self, session: Session): + rwm = EdgeWorkerModel( + worker_name="test2_worker", + state=EdgeWorkerState.IDLE, + queues=["default"], + first_online=timezone.utcnow(), + ) + session.add(rwm) + session.commit() + + EdgeWorker.set_state("test2_worker", EdgeWorkerState.RUNNING, 1, _get_sysinfo()) + + worker: list[EdgeWorkerModel] = session.query(EdgeWorkerModel).all() + assert len(worker) == 1 + assert worker[0].worker_name == "test2_worker" + assert worker[0].state == EdgeWorkerState.RUNNING diff --git a/tests/providers/google/cloud/operators/test_dataflow.py b/tests/providers/google/cloud/operators/test_dataflow.py index 14787dba19b61a..4263d3300f11bc 100644 --- a/tests/providers/google/cloud/operators/test_dataflow.py +++ b/tests/providers/google/cloud/operators/test_dataflow.py @@ -315,7 +315,7 @@ def test_check_job_running_exec(self, gcs_hook, dataflow_mock, beam_hook_mock): "output": "gs://test/output", "labels": {"foo": "bar", "airflow-version": self.expected_airflow_version}, } - dataflow_running.assert_called_once_with(name=JOB_NAME, variables=variables) + dataflow_running.assert_called_once_with(name=JOB_NAME, variables=variables, location=TEST_LOCATION) @mock.patch( "airflow.providers.google.cloud.operators.dataflow.process_line_and_extract_dataflow_job_id_callback" diff --git a/tests/providers/microsoft/azure/operators/test_msgraph.py b/tests/providers/microsoft/azure/operators/test_msgraph.py index b7520d731544c9..754b653ccdaf08 100644 --- a/tests/providers/microsoft/azure/operators/test_msgraph.py +++ b/tests/providers/microsoft/azure/operators/test_msgraph.py @@ -26,7 +26,13 @@ from airflow.providers.microsoft.azure.operators.msgraph import MSGraphAsyncOperator from airflow.triggers.base import TriggerEvent from tests.providers.microsoft.azure.base import Base -from tests.providers.microsoft.conftest import load_file, load_json, mock_json_response, mock_response +from tests.providers.microsoft.conftest import ( + load_file, + load_json, + mock_context, + mock_json_response, + mock_response, +) class TestMSGraphAsyncOperator(Base): @@ -127,3 +133,31 @@ def test_template_fields(self): for template_field in MSGraphAsyncOperator.template_fields: getattr(operator, template_field) + + def test_paginate_without_query_parameters(self): + operator = MSGraphAsyncOperator( + task_id="user_license_details", + conn_id="msgraph_api", + url="users", + ) + context = mock_context(task=operator) + response = load_json("resources", "users.json") + next_link, query_parameters = MSGraphAsyncOperator.paginate(operator, response, context) + + assert next_link == response["@odata.nextLink"] + assert query_parameters is None + + def test_paginate_with_context_query_parameters(self): + operator = MSGraphAsyncOperator( + task_id="user_license_details", + conn_id="msgraph_api", + url="users", + query_parameters={"$top": 12}, + ) + context = mock_context(task=operator) + response = load_json("resources", "users.json") + response["@odata.count"] = 100 + url, query_parameters = MSGraphAsyncOperator.paginate(operator, response, context) + + assert url == "users" + assert query_parameters == {"$skip": 12, "$top": 12} diff --git a/tests/providers/opensearch/conftest.py b/tests/providers/opensearch/conftest.py index 35291b0d6e2eb7..8edb688068bd13 100644 --- a/tests/providers/opensearch/conftest.py +++ b/tests/providers/opensearch/conftest.py @@ -50,7 +50,7 @@ def index(self, document: dict, index_name: str, doc_id: int, **kwargs: Any) -> return doc_id -class MockClient(OpenSearch): +class MockClient: def count(self, index: Any = None, body: Any = None): return {"count": 1, "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0}} diff --git a/tests/providers/opensearch/log/test_os_json_formatter.py b/tests/providers/opensearch/log/test_os_json_formatter.py index 27cf2be2116302..bae039e199bdb2 100644 --- a/tests/providers/opensearch/log/test_os_json_formatter.py +++ b/tests/providers/opensearch/log/test_os_json_formatter.py @@ -24,6 +24,8 @@ import pendulum import pytest +opensearchpy = pytest.importorskip("opensearchpy") + from airflow.providers.opensearch.log.os_task_handler import ( OpensearchJSONFormatter, ) diff --git a/tests/providers/opensearch/log/test_os_response.py b/tests/providers/opensearch/log/test_os_response.py index d4c1858d37a921..5b2f36d3c21b9f 100644 --- a/tests/providers/opensearch/log/test_os_response.py +++ b/tests/providers/opensearch/log/test_os_response.py @@ -22,6 +22,8 @@ import pytest +opensearchpy = pytest.importorskip("opensearchpy") + from airflow.providers.opensearch.log.os_response import ( AttributeList, Hit, diff --git a/tests/providers/opensearch/log/test_os_task_handler.py b/tests/providers/opensearch/log/test_os_task_handler.py index 27f2ac77283800..d23249ba9e6d9a 100644 --- a/tests/providers/opensearch/log/test_os_task_handler.py +++ b/tests/providers/opensearch/log/test_os_task_handler.py @@ -29,7 +29,8 @@ import pendulum import pytest -from opensearchpy import OpenSearch + +opensearchpy = pytest.importorskip("opensearchpy") from opensearchpy.exceptions import NotFoundError from airflow.configuration import conf @@ -112,11 +113,6 @@ def setup_method(self): ) self.os_task_handler.client = MockClient() - # self.index_name = "test_index" - # self.doc_type = "log" - # self.test_message = "some random stuff" - # self.body = {"message": self.test_message, "log_id": self.LOG_ID, "offset": 1} - # self.os.index(index=self.index_name, doc_type=self.doc_type, body=self.body, id=1) def teardown_method(self): shutil.rmtree(self.local_log_location.split(os.path.sep)[0], ignore_errors=True) @@ -141,7 +137,6 @@ def concat_logs(lines): ) def test_client(self): - assert issubclass(type(self.os_task_handler.client), OpenSearch) assert self.os_task_handler.index_patterns == "_all" def test_client_with_config(self): diff --git a/tests/serialization/test_dag_serialization.py b/tests/serialization/test_dag_serialization.py index 7dfe57054c60fb..758c7f496ed93d 100644 --- a/tests/serialization/test_dag_serialization.py +++ b/tests/serialization/test_dag_serialization.py @@ -143,7 +143,6 @@ def detect_task_dependencies(task: Operator) -> DagDependency | None: # type: i "retries": 1, "retry_delay": {"__type": "timedelta", "__var": 300.0}, "max_retry_delay": {"__type": "timedelta", "__var": 600.0}, - "sla": {"__type": "timedelta", "__var": 100.0}, }, }, "start_date": 1564617600.0, @@ -179,7 +178,6 @@ def detect_task_dependencies(task: Operator) -> DagDependency | None: # type: i "retries": 1, "retry_delay": 300.0, "max_retry_delay": 600.0, - "sla": 100.0, "downstream_task_ids": [], "_is_empty": False, "ui_color": "#f0ede4", @@ -218,7 +216,6 @@ def detect_task_dependencies(task: Operator) -> DagDependency | None: # type: i "retries": 1, "retry_delay": 300.0, "max_retry_delay": 600.0, - "sla": 100.0, "downstream_task_ids": [], "_is_empty": False, "_operator_extra_links": [{"tests.test_utils.mock_operators.CustomOpLink": {}}], @@ -290,7 +287,6 @@ def make_simple_dag(): "retry_delay": timedelta(minutes=5), "max_retry_delay": timedelta(minutes=10), "depends_on_past": False, - "sla": timedelta(seconds=100), }, start_date=datetime(2019, 8, 1), is_paused_upon_creation=False, @@ -1299,7 +1295,6 @@ def test_no_new_fields_added_to_base_operator(self): "retry_delay": timedelta(0, 300), "retry_exponential_backoff": False, "run_as_user": None, - "sla": None, "task_id": "10", "trigger_rule": "all_success", "wait_for_downstream": False, diff --git a/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py b/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py index 224ddc21d8e87a..a1d1211da4c466 100644 --- a/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py +++ b/tests/system/providers/amazon/aws/example_bedrock_retrieve_and_generate.py @@ -261,7 +261,10 @@ def create_vector_index(index_name: str, collection_id: str, region: str): ) log.debug(e) retries -= 1 - sleep(2) + if retries: + sleep(2) + else: + raise @task diff --git a/tests/system/providers/amazon/aws/example_redshift.py b/tests/system/providers/amazon/aws/example_redshift.py index cc92076dcba0a6..67b822d41ef55c 100644 --- a/tests/system/providers/amazon/aws/example_redshift.py +++ b/tests/system/providers/amazon/aws/example_redshift.py @@ -50,7 +50,6 @@ DB_NAME = "dev" POLL_INTERVAL = 10 - with DAG( dag_id=DAG_ID, start_date=datetime(2021, 1, 1), @@ -175,6 +174,37 @@ wait_for_completion=True, ) + # [START howto_operator_redshift_data_session_reuse] + create_tmp_table_data_api = RedshiftDataOperator( + task_id="create_tmp_table_data_api", + cluster_identifier=redshift_cluster_identifier, + database=DB_NAME, + db_user=DB_LOGIN, + sql=""" + CREATE TEMPORARY TABLE tmp_people ( + id INTEGER, + first_name VARCHAR(100), + age INTEGER + ); + """, + poll_interval=POLL_INTERVAL, + wait_for_completion=True, + session_keep_alive_seconds=600, + ) + + insert_data_reuse_session = RedshiftDataOperator( + task_id="insert_data_reuse_session", + sql=""" + INSERT INTO tmp_people VALUES ( 1, 'Bob', 30); + INSERT INTO tmp_people VALUES ( 2, 'Alice', 35); + INSERT INTO tmp_people VALUES ( 3, 'Charlie', 40); + """, + poll_interval=POLL_INTERVAL, + wait_for_completion=True, + session_id="{{ task_instance.xcom_pull(task_ids='create_tmp_table_data_api', key='session_id') }}", + ) + # [END howto_operator_redshift_data_session_reuse] + # [START howto_operator_redshift_delete_cluster] delete_cluster = RedshiftDeleteClusterOperator( task_id="delete_cluster", @@ -209,13 +239,20 @@ delete_cluster, ) + # Test session reuse in parallel + chain( + wait_cluster_available_after_resume, + create_tmp_table_data_api, + insert_data_reuse_session, + delete_cluster_snapshot, + ) + from tests.system.utils.watcher import watcher # This test needs watcher in order to properly mark success/failure # when "tearDown" task with trigger rule is part of the DAG list(dag.tasks) >> watcher() - from tests.system.utils import get_test_run # noqa: E402 # Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) diff --git a/tests/system/providers/amazon/aws/example_redshift_s3_transfers.py b/tests/system/providers/amazon/aws/example_redshift_s3_transfers.py index 06910461905076..9fb989ec53697d 100644 --- a/tests/system/providers/amazon/aws/example_redshift_s3_transfers.py +++ b/tests/system/providers/amazon/aws/example_redshift_s3_transfers.py @@ -53,22 +53,34 @@ S3_KEY = "s3_output_" S3_KEY_2 = "s3_key_2" +S3_KEY_3 = "s3_output_tmp_table_" S3_KEY_PREFIX = "s3_k" REDSHIFT_TABLE = "test_table" +REDSHIFT_TMP_TABLE = "tmp_table" -SQL_CREATE_TABLE = f""" - CREATE TABLE IF NOT EXISTS {REDSHIFT_TABLE} ( - fruit_id INTEGER, - name VARCHAR NOT NULL, - color VARCHAR NOT NULL - ); -""" +DATA = "0, 'Airflow', 'testing'" -SQL_INSERT_DATA = f"INSERT INTO {REDSHIFT_TABLE} VALUES ( 1, 'Banana', 'Yellow');" -SQL_DROP_TABLE = f"DROP TABLE IF EXISTS {REDSHIFT_TABLE};" +def _drop_table(table_name: str) -> str: + return f"DROP TABLE IF EXISTS {table_name};" -DATA = "0, 'Airflow', 'testing'" + +def _create_table(table_name: str, is_temp: bool = False) -> str: + temp_keyword = "TEMPORARY" if is_temp else "" + return ( + _drop_table(table_name) + + f""" + CREATE {temp_keyword} TABLE {table_name} ( + fruit_id INTEGER, + name VARCHAR NOT NULL, + color VARCHAR NOT NULL + ); + """ + ) + + +def _insert_data(table_name: str) -> str: + return f"INSERT INTO {table_name} VALUES ( 1, 'Banana', 'Yellow');" with DAG( @@ -124,7 +136,7 @@ cluster_identifier=redshift_cluster_identifier, database=DB_NAME, db_user=DB_LOGIN, - sql=SQL_CREATE_TABLE, + sql=_create_table(REDSHIFT_TABLE), wait_for_completion=True, ) @@ -133,7 +145,7 @@ cluster_identifier=redshift_cluster_identifier, database=DB_NAME, db_user=DB_LOGIN, - sql=SQL_INSERT_DATA, + sql=_insert_data(REDSHIFT_TABLE), wait_for_completion=True, ) @@ -159,6 +171,33 @@ bucket_key=f"{S3_KEY}/{REDSHIFT_TABLE}_0000_part_00", ) + create_tmp_table = RedshiftDataOperator( + task_id="create_tmp_table", + cluster_identifier=redshift_cluster_identifier, + database=DB_NAME, + db_user=DB_LOGIN, + sql=_create_table(REDSHIFT_TMP_TABLE, is_temp=True) + _insert_data(REDSHIFT_TMP_TABLE), + wait_for_completion=True, + session_keep_alive_seconds=600, + ) + + transfer_redshift_to_s3_reuse_session = RedshiftToS3Operator( + task_id="transfer_redshift_to_s3_reuse_session", + redshift_data_api_kwargs={ + "wait_for_completion": True, + "session_id": "{{ task_instance.xcom_pull(task_ids='create_tmp_table', key='session_id') }}", + }, + s3_bucket=bucket_name, + s3_key=S3_KEY_3, + table=REDSHIFT_TMP_TABLE, + ) + + check_if_tmp_table_key_exists = S3KeySensor( + task_id="check_if_tmp_table_key_exists", + bucket_name=bucket_name, + bucket_key=f"{S3_KEY_3}/{REDSHIFT_TMP_TABLE}_0000_part_00", + ) + # [START howto_transfer_s3_to_redshift] transfer_s3_to_redshift = S3ToRedshiftOperator( task_id="transfer_s3_to_redshift", @@ -176,6 +215,28 @@ ) # [END howto_transfer_s3_to_redshift] + create_dest_tmp_table = RedshiftDataOperator( + task_id="create_dest_tmp_table", + cluster_identifier=redshift_cluster_identifier, + database=DB_NAME, + db_user=DB_LOGIN, + sql=_create_table(REDSHIFT_TMP_TABLE, is_temp=True), + wait_for_completion=True, + session_keep_alive_seconds=600, + ) + + transfer_s3_to_redshift_tmp_table = S3ToRedshiftOperator( + task_id="transfer_s3_to_redshift_tmp_table", + redshift_data_api_kwargs={ + "session_id": "{{ task_instance.xcom_pull(task_ids='create_dest_tmp_table', key='session_id') }}", + "wait_for_completion": True, + }, + s3_bucket=bucket_name, + s3_key=S3_KEY_2, + table=REDSHIFT_TMP_TABLE, + copy_options=["csv"], + ) + # [START howto_transfer_s3_to_redshift_multiple_keys] transfer_s3_to_redshift_multiple = S3ToRedshiftOperator( task_id="transfer_s3_to_redshift_multiple", @@ -198,7 +259,7 @@ cluster_identifier=redshift_cluster_identifier, database=DB_NAME, db_user=DB_LOGIN, - sql=SQL_DROP_TABLE, + sql=_drop_table(REDSHIFT_TABLE), wait_for_completion=True, trigger_rule=TriggerRule.ALL_DONE, ) @@ -235,13 +296,33 @@ delete_bucket, ) + chain( + # TEST SETUP + wait_cluster_available, + create_tmp_table, + # TEST BODY + transfer_redshift_to_s3_reuse_session, + check_if_tmp_table_key_exists, + # TEST TEARDOWN + delete_cluster, + ) + + chain( + # TEST SETUP + wait_cluster_available, + create_dest_tmp_table, + # TEST BODY + transfer_s3_to_redshift_tmp_table, + # TEST TEARDOWN + delete_cluster, + ) + from tests.system.utils.watcher import watcher # This test needs watcher in order to properly mark success/failure # when "tearDown" task with trigger rule is part of the DAG list(dag.tasks) >> watcher() - from tests.system.utils import get_test_run # noqa: E402 # Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) diff --git a/tests/system/providers/google/cloud/cloud_sql/example_cloud_sql.py b/tests/system/providers/google/cloud/cloud_sql/example_cloud_sql.py index d4b0d39e2e2d86..41d374f71488c8 100644 --- a/tests/system/providers/google/cloud/cloud_sql/example_cloud_sql.py +++ b/tests/system/providers/google/cloud/cloud_sql/example_cloud_sql.py @@ -62,8 +62,6 @@ FILE_URI = f"gs://{BUCKET_NAME}/{FILE_NAME}" FILE_URI_DEFERRABLE = f"gs://{BUCKET_NAME}/{FILE_NAME_DEFERRABLE}" -FAILOVER_REPLICA_NAME = f"{INSTANCE_NAME}-failover-replica" -READ_REPLICA_NAME = f"{INSTANCE_NAME}-read-replica" CLONED_INSTANCE_NAME = f"{INSTANCE_NAME}-clone" # Bodies below represent Cloud SQL instance resources: @@ -86,30 +84,15 @@ "locationPreference": {"zone": "europe-west4-a"}, "maintenanceWindow": {"hour": 5, "day": 7, "updateTrack": "canary"}, "pricingPlan": "PER_USE", - "replicationType": "ASYNCHRONOUS", "storageAutoResize": True, "storageAutoResizeLimit": 0, "userLabels": {"my-key": "my-value"}, }, - "failoverReplica": {"name": FAILOVER_REPLICA_NAME}, "databaseVersion": "MYSQL_5_7", "region": "europe-west4", } # [END howto_operator_cloudsql_create_body] -# [START howto_operator_cloudsql_create_replica] -read_replica_body = { - "name": READ_REPLICA_NAME, - "settings": { - "tier": "db-n1-standard-1", - }, - "databaseVersion": "MYSQL_5_7", - "region": "europe-west4", - "masterInstanceName": INSTANCE_NAME, -} -# [END howto_operator_cloudsql_create_replica] - - # [START howto_operator_cloudsql_patch_body] patch_body = { "name": INSTANCE_NAME, @@ -169,12 +152,6 @@ ) # [END howto_operator_cloudsql_create] - sql_instance_read_replica_create = CloudSQLCreateInstanceOperator( - body=read_replica_body, - instance=READ_REPLICA_NAME, - task_id="sql_instance_read_replica_create", - ) - # ############################################## # # ### MODIFYING INSTANCE AND ITS DATABASE ###### # # ############################################## # @@ -277,20 +254,6 @@ # ### INSTANCES TEAR DOWN ###################### # # ############################################## # - # [START howto_operator_cloudsql_replicas_delete] - sql_instance_failover_replica_delete_task = CloudSQLDeleteInstanceOperator( - instance=FAILOVER_REPLICA_NAME, - task_id="sql_instance_failover_replica_delete_task", - trigger_rule=TriggerRule.ALL_DONE, - ) - - sql_instance_read_replica_delete_task = CloudSQLDeleteInstanceOperator( - instance=READ_REPLICA_NAME, - task_id="sql_instance_read_replica_delete_task", - trigger_rule=TriggerRule.ALL_DONE, - ) - # [END howto_operator_cloudsql_replicas_delete] - sql_instance_clone_delete_task = CloudSQLDeleteInstanceOperator( instance=CLONED_INSTANCE_NAME, task_id="sql_instance_clone_delete_task", @@ -312,7 +275,6 @@ create_bucket # TEST BODY >> sql_instance_create_task - >> sql_instance_read_replica_create >> sql_instance_patch_task >> sql_db_create_task >> sql_db_patch_task @@ -323,8 +285,6 @@ >> sql_import_task >> sql_instance_clone >> sql_db_delete_task - >> sql_instance_failover_replica_delete_task - >> sql_instance_read_replica_delete_task >> sql_instance_clone_delete_task >> sql_instance_delete_task # TEST TEARDOWN