From 79abe63ed851ffdbba7d6de46fbfc9786735d012 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Wed, 4 Jun 2025 16:02:18 -0700 Subject: [PATCH 01/41] wip --- .../blue_green_plugin.py | 358 ++++++++++++++++++ aws_advanced_python_wrapper/plugin_service.py | 28 +- ...dvanced_python_wrapper_messages.properties | 48 +++ tests/unit/test_blue_green_plugin.py | 51 +++ 4 files changed, 484 insertions(+), 1 deletion(-) create mode 100644 aws_advanced_python_wrapper/blue_green_plugin.py create mode 100644 tests/unit/test_blue_green_plugin.py diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py new file mode 100644 index 00000000..9ff14d39 --- /dev/null +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -0,0 +1,358 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import time +from abc import ABC, abstractmethod +from dataclasses import dataclass +from enum import Enum, auto +from threading import Condition +from types import MappingProxyType +from typing import Optional, Tuple, Any, Callable, Dict, Set, NoReturn, ClassVar + +from mysql.connector import Connect + +from aws_advanced_python_wrapper.errors import AwsWrapperError, UnsupportedOperationError +from aws_advanced_python_wrapper.hostinfo import HostInfo +from aws_advanced_python_wrapper.pep249 import Connection +from aws_advanced_python_wrapper.plugin import Plugin +from aws_advanced_python_wrapper.plugin_service import PluginService +from aws_advanced_python_wrapper.utils.log import Logger +from aws_advanced_python_wrapper.utils.messages import Messages +from aws_advanced_python_wrapper.utils.properties import Properties +from aws_advanced_python_wrapper.utils.rdsutils import RdsUtils + +logger = Logger(__name__) + + +class BlueGreenIntervalRate(Enum): + BASELINE = auto() + INCREASED = auto() + HIGH = auto() + + +class BlueGreenPhase(Enum): + NOT_CREATED = (0, False) + CREATED = (1, False) + PREPARATION = (2, True) # nodes are accessible + IN_PROGRESS = (3, True) # active phase; nodes are not accessible + POST = (4, True) # nodes are accessible; some change are still in progress + COMPLETED = (5, True) # all changes are completed + + def __new__(cls, value: int, is_switchover_active_or_completed: bool): + obj = object.__new__(cls) + obj._value_ = (value, is_switchover_active_or_completed) + return obj + + @property + def phase_value(self) -> int: + return self.value[0] + + @property + def is_switchover_active_or_completed(self) -> bool: + return self.value[1] + + @staticmethod + def parse_phase(phase_str: Optional[str]) -> BlueGreenPhase: + if not phase_str: + return BlueGreenPhase.NOT_CREATED + + match phase_str.upper(): + case "AVAILABLE": + return BlueGreenPhase.CREATED + case "SWITCHOVER_INITIATED": + return BlueGreenPhase.PREPARATION + case "SWITCHOVER_IN_PROGRESS": + return BlueGreenPhase.IN_PROGRESS + case "SWITCHOVER_IN_POST_PROCESSING": + return BlueGreenPhase.POST + case "SWITCHOVER_COMPLETED": + return BlueGreenPhase.COMPLETED + case _: + raise ValueError(Messages.get_formatted("BlueGreenPhase.UnknownStatus", phase_str)) + + +class BlueGreenRole(Enum): + SOURCE = 0 + TARGET = 1 + + @staticmethod + def parse_role(role_str: str, version: str) -> BlueGreenRole: + if "1.0" != version: + raise ValueError(Messages.get_formatted("BlueGreenRole.UnknownVersion", version)) + + match role_str: + case "BLUE_GREEN_DEPLOYMENT_SOURCE": + return BlueGreenRole.SOURCE + case "BLUE_GREEN_DEPLOYMENT_TARGET": + return BlueGreenRole.TARGET + case _: + raise ValueError(Messages.get_formatted("BlueGreenRole.UnknownRole", role_str)) + + +class BlueGreenStatus: + def __init__(self, + bg_id: str, + phase: BlueGreenPhase, + connect_routing: Tuple[ConnectRouting, ...] = (), + execute_routing: Tuple[ExecuteRouting, ...] = (), + role_by_endpoint: MappingProxyType[str, BlueGreenRole] = MappingProxyType({}) + ): + self.bg_id = bg_id + self.phase = phase + self.connect_routings = tuple(connect_routing) + self.execute_routings = tuple(execute_routing) + self.role_by_endpoint = MappingProxyType(role_by_endpoint) + + def get_role(self, host_info: HostInfo) -> BlueGreenRole: + return self.role_by_endpoint.get(host_info.host.lower()) + + def __str__(self) -> str: + connect_routings_str = ',\n '.join(str(cr) for cr in self.connect_routings) + execute_routings_str = ',\n '.join(str(er) for er in self.execute_routings) + role_mappings = ',\n '.join(f"{endpoint}: {role}" for endpoint, role in self.role_by_endpoint.items()) + + return (f"{self.__class__.__name__}(\n" + f" id='{self.bg_id}',\n" + f" phase={self.phase},\n" + f" connect_routings=[\n" + f" {connect_routings_str}\n" + f" ],\n" + f" execute_routings=[\n" + f" {execute_routings_str}\n" + f" ],\n" + f" role_by_endpoint={{\n" + f" {role_mappings}\n" + f" }}\n" + f")") + + +@dataclass +class BlueGreenInterimStatus: + phase: BlueGreenPhase + version: str + port: int + start_topology: Tuple[HostInfo, ...] + start_ip_addresses_by_host_map: Dict[str, Optional[str]] + current_topology: Tuple[HostInfo, ...] + current_ip_addresses_by_host_map: Dict[str, Optional[str]] + host_names: Set[str] + all_start_topology_ip_changed: bool + all_start_topology_endpoints_removed: bool + all_topology_changed: bool + + def get_custom_hashcode(self): + result: int = self.get_value_hash(1, "" if self.phase is None else str(self.phase)) + result = self.get_value_hash(result, str(self.version)) + result = self.get_value_hash(result, str(self.port)) + result = self.get_value_hash(result, str(self.all_start_topology_ip_changed)) + result = self.get_value_hash(result, str(self.all_start_topology_endpoints_removed)) + result = self.get_value_hash(result, str(self.all_topology_changed)) + result = self.get_value_hash(result, "" if self.host_names is None else ",".join(sorted(self.host_names))) + result = self.get_host_tuple_hash(result, self.start_topology) + result = self.get_host_tuple_hash(result, self.current_topology) + result = self.get_ip_dict_hash(result, self.start_ip_addresses_by_host_map) + result = self.get_ip_dict_hash(result, self.current_ip_addresses_by_host_map) + return result + + def get_host_tuple_hash(self, current_hash: int, host_tuple: Optional[Tuple[HostInfo, ...]]) -> int: + if host_tuple is None or len(host_tuple) == 0: + tuple_str = "" + else: + tuple_str = ",".join(sorted(x.url + x.role for x in host_tuple)) + + return self.get_value_hash(current_hash, tuple_str) + + def get_ip_dict_hash(self, current_hash: int, ip_dict: Optional[Dict[str, Optional[str]]]) -> int: + if ip_dict is None or len(ip_dict) == 0: + dict_str = "" + else: + dict_str = ",".join(sorted(f"{key}{str(value)}" for key, value in ip_dict.items())) + + return self.get_value_hash(current_hash, dict_str) + + def get_value_hash(self, current_hash: int, val: Optional[str]) -> int: + return current_hash * 31 + hash("" if val is None else val) + + def __str__(self): + host_names_str = ',\n '.join(self.host_names) + start_topology_str = ',\n '.join(str(h) for h in self.start_topology) + start_addresses_by_host_str = ',\n '.join( + f"{k}: {v}" for k, v in self.start_ip_addresses_by_host_map.items() + ) + current_topology_str = ',\n '.join(str(h) for h in self.current_topology) + current_addresses_by_host_str = ',\n '.join( + f"{k}: {v}" for k, v in self.current_ip_addresses_by_host_map.items() + ) + + return (f"{self.__class__.__name__}(\n" + f" phase={self.phase},\n" + f" version={self.version},\n" + f" port={self.port},\n" + f" host_names=[\n" + f" {host_names_str}\n" + f" ],\n" + f" start_topology=[\n" + f" {start_topology_str}\n" + f" ],\n" + f" start_ip_addresses_by_host_map={{\n" + f" {start_addresses_by_host_str}\n" + f" }}\n" + f" current_topology=[\n" + f" {current_topology_str}\n" + f" ],\n" + f" current_ip_addresses_by_host_map={{\n" + f" {current_addresses_by_host_str}\n" + f" }}\n" + f" all_start_topology_ip_changed={self.all_start_topology_ip_changed}\n" + f" all_start_topology_endpoints_removed={self.all_start_topology_endpoints_removed}\n" + f" all_topology_changed={self.all_topology_changed}\n" + f")") + + +class ConnectRouting(ABC): + @abstractmethod + def is_match(self, host_info: HostInfo, role: BlueGreenRole) -> bool: + ... + + @abstractmethod + def apply(self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Connection: + ... + + +class ExecuteRouting(ABC): + @abstractmethod + def is_match(self, host_info: HostInfo, role: BlueGreenRole) -> bool: + ... + + @abstractmethod + def apply(self, + plugin: Plugin, + plugin_service: PluginService, + props: Properties, + target: type, + method_name: str, + execute_func: Callable, + *args: Any, + **kwargs: Any) -> Optional[Any]: + ... + + +class BaseRouting: + _MIN_SLEEP_MS = 50 + + def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): + self._cv = Condition() + self._endpoint = endpoint # host and optionally port as well + self._bg_role = bg_role + + def delay(self, delay_ms: int, bg_status: BlueGreenStatus, plugin_service: PluginService, bg_id: str): + end_time = time.time() + (delay_ms / 1_000) + min_delay_ms = min(delay_ms, BaseRouting._MIN_SLEEP_MS) + + if bg_status is None: + time.sleep(delay_ms / 1_000) + return + + while bg_status is plugin_service.get_status(BlueGreenStatus, bg_id) and time.time() < end_time: + with self._cv: + self._cv.wait(min_delay_ms / 1_000) + + def is_match(self, host_info: HostInfo, bg_role: BlueGreenRole) -> bool: + if self._endpoint is None: + return self._bg_role is None or self._bg_role == bg_role + + if host_info is None: + return False + + return self._endpoint == host_info.url.lower() and (self._bg_role is None or self._bg_role == bg_role) + + def __str__(self): + endpoint_str = "None" if self._endpoint is None else f"'{self._endpoint}'" + return f"{self.__class__.__name__}(endpoint={endpoint_str}, bg_role={self._bg_role})" + + +class PassThroughConnectRouting(BaseRouting, ConnectRouting): + def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): + super().__init__(endpoint, bg_role) + + def apply(self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Connection: + return connect_func() + + +class RejectConnectRouting(BaseRouting, ConnectRouting): + def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): + super().__init__(endpoint, bg_role) + + def apply(self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Connection: + raise AwsWrapperError(Messages.get("RejectConnectRouting.InProgressCantConnect")) + + +class SubstituteConnectRouting(BaseRouting, ConnectRouting): + _rds_utils: ClassVar[RdsUtils] = RdsUtils() + + def __init__(self, + endpoint: Optional[str], + bg_role: Optional[BlueGreenRole], + substitute_host_info: HostInfo, + iam_hosts: Optional[Tuple[HostInfo, ...]], + on_iam_connect_func: Optional[Callable]): + super().__init__(endpoint, bg_role) + self._substitute_host_info = substitute_host_info + self._iam_hosts = iam_hosts + self._on_iam_connect_func = on_iam_connect_func + + def apply(self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Connection: + ... + + +class PassThroughExecuteRouting(BaseRouting, ExecuteRouting): + def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): + super().__init__(endpoint, bg_role) + + def apply(self, + plugin: Plugin, + plugin_service: PluginService, + props: Properties, + target: type, + method_name: str, + execute_func: Callable, + *args: Any, + **kwargs: Any) -> Optional[Any]: + return execute_func() diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 3c108284..2f9806b6 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, List, Type +from typing import TYPE_CHECKING, ClassVar, List, Type, TypeVar, cast from aws_advanced_python_wrapper.aurora_initial_connection_strategy_plugin import \ AuroraInitialConnectionStrategyPluginFactory @@ -111,6 +111,8 @@ def plugin_manager(self, value): self._plugin_manager = value +T = TypeVar('T') + class PluginService(ExceptionHandler, Protocol): @property @abstractmethod @@ -276,9 +278,19 @@ def get_connection_provider_manager(self) -> ConnectionProviderManager: def get_telemetry_factory(self) -> TelemetryFactory: ... + @abstractmethod + def set_status(self, clazz: Type[T], status: Optional[T], key: str): + ... + + @abstractmethod + def get_status(self, clazz: Type[T], key: str) -> T: + ... + class PluginServiceImpl(PluginService, HostListProviderService, CanReleaseResources): + _STATUS_CACHE_EXPIRATION_NANO = 60 * 1_000_000_000 # one hour _host_availability_expiring_cache: CacheMap[str, HostAvailability] = CacheMap() + _status_cache: ClassVar[CacheMap[str, Any]] = CacheMap() _executor: ClassVar[Executor] = ThreadPoolExecutor(thread_name_prefix="PluginServiceImplExecutor") @@ -643,6 +655,20 @@ def release_resources(self): if host_list_provider is not None and isinstance(host_list_provider, CanReleaseResources): host_list_provider.release_resources() + def set_status(self, clazz: Type[T], status: Optional[T], key: str): + cache_key = self._get_status_cache_key(clazz, key) + if status is None: + self._status_cache.remove(cache_key) + else: + self._status_cache.put(cache_key, status, PluginServiceImpl._STATUS_CACHE_EXPIRATION_NANO) + + def _get_status_cache_key(self, clazz: Type[T], key: str) -> str: + key_str = "" if key is None else key.strip().lower() + return f"{key_str}::{clazz.__name__}" + + def get_status(self, clazz: Type[T], key: str) -> T: + return cast(clazz, PluginServiceImpl._status_cache.get(self._get_status_cache_key(clazz, key))) + class PluginManager(CanReleaseResources): _ALL_METHODS: str = "*" diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index 3c0b341e..56faa6ee 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -40,6 +40,41 @@ AwsSecretsManagerPlugin.UnhandledException=[AwsSecretsManagerPlugin] Unhandled e AwsWrapperConnection.ConnectionNotOpen=[AwsWrapperConnection] Attempted to establish an initial connection, but the connection returned by the connect call evaluated to None. AwsWrapperConnection.InitialHostInfoNone=[AwsWrapperConnection] The initial connection host info unexpectedly evaluated to None after initializing the host list provider. +BlueGreenPhase.UnknownStatus=[BlueGreenPhase] Unknown blue/green status '{}'. + +BlueGreenRole.UnknownRole=[BlueGreenRole] Unknown blue/green role '{}'. +BlueGreenRole.UnknownVersion=[BlueGreenRole] Unknown blue/green version '{}'. + +BlueGreenStatusMonitor.CreateHostListProvider=[BlueGreenStatusMonitor] [{}] Creating a new HostListProvider, clusterId: {}. +BlueGreenStatusMonitor.Exception=[BlueGreenStatusMonitor] [{}] currentPhase: {}, exception while querying for blue green status. +BlueGreenStatusMonitor.HostSpecNull=[BlueGreenStatusMonitor] Unable to initialize HostListProvider since connection host information is null. +BlueGreenStatusMonitor.Interrupted=[BlueGreenStatusMonitor] [{}] Interrupted. +BlueGreenStatusMonitor.MonitoringUnhandledException=[BlueGreenStatusMonitor] [{}] Unhandled exception while monitoring blue/green status. +BlueGreenStatusMonitor.NoEntriesInStatusTable=[BlueGreenStatusMonitor] [{}] No entries in status table. +BlueGreenStatusMonitor.OpenedConnection=[BlueGreenStatusMonitor] [{}] Opened monitoring connection to {}. +BlueGreenStatusMonitor.OpenedConnectionWithIp=[BlueGreenStatusMonitor] [{}] Opened monitoring connection (IP) to {}. +BlueGreenStatusMonitor.OpeningConnection=[BlueGreenStatusMonitor] [{}] Opening monitoring connection to {}. +BlueGreenStatusMonitor.OpeningConnectionWithIp=[BlueGreenStatusMonitor] [{}] Opening monitoring connection (IP) to {}. +BlueGreenStatusMonitor.StatusChanged=[BlueGreenStatusMonitor] [{}] Status changed to: {} +BlueGreenStatusMonitor.StatusNotAvailable=[BlueGreenStatusMonitor] [{}] (status not available) currentPhase: {} +BlueGreenStatusMonitor.ThreadCompleted=[BlueGreenStatusMonitor] [{}] Blue/green status monitoring thread is completed. +BlueGreenStatusMonitor.UnhandledException=[BlueGreenStatusMonitor] [{}] Unhandled exception. +BlueGreenStatusMonitor.UnhandledSqlException=[BlueGreenStatusMonitor] [{}] Unhandled SQLException. +BlueGreenStatusMonitor.UsesVersion=[BlueGreenStatusMonitor] [{}] Blue/Green deployment uses version '{}' which the driver doesn't support. Version '{}' will be used instead. + +BlueGreenStatusProvider.BlueDnsCompleted=[BlueGreenStatusProvider] [bgdId: '{}'] Blue DNS update completed. +BlueGreenStatusProvider.GreenDnsRemoved=[BlueGreenStatusProvider] [bgdId: '{}'] Green DNS removed. +BlueGreenStatusProvider.GreenNodeChangedName=[BlueGreenStatusProvider] Green node '{}' has changed its name to '{}'. +BlueGreenStatusProvider.GreenTopologyChanged=[BlueGreenStatusProvider] [bgdId: '{}'] Green topology changed. +BlueGreenStatusProvider.InterimStatus=[BlueGreenStatusProvider] [bgdId: '{}', role: {}] {} +BlueGreenStatusProvider.ResetContext=[BlueGreenStatusProvider] Resetting context. +BlueGreenStatusProvider.Rollback=[BlueGreenStatusProvider] [bgdId: '{}'] Blue/Green deployment is in rollback mode. +BlueGreenStatusProvider.SwitchoverTimeout=[BlueGreenStatusProvider] Blue/Green switchover has timed out. +BlueGreenStatusProvider.UnknownPhase=[BlueGreenStatusProvider] [bgdId: '{}'] Unknown BG phase '{}'. +BlueGreenStatusProvider.UnsupportedDialect=[BlueGreenStatusProvider] [bgdId: '{}'] Blue/Green Deployments isn't supported by database dialect {}. + +CloseConnectionExecuteRouting.InProgressConnectionClosed=[CloseConnectionExecuteRouting] Connection has been closed since Blue/Green switchover is in progress. + conftest.ExceptionWhileObtainingInstanceIDs=[conftest] An exception was thrown while attempting to obtain the cluster's instance IDs: '{}' ConnectTimePlugin.ConnectTime=[ConnectTimePlugin] Connected in {} nanos. @@ -276,6 +311,8 @@ ReadWriteSplittingPlugin.SwitchedFromWriterToReader=[ReadWriteSplittingPlugin] S ReadWriteSplittingPlugin.UnavailableHostInfo=[ReadWriteSplittingPlugin] Current Host Info could not be found in plugin service. ReadWriteSplittingPlugin.UnsupportedHostInfoSelectorStrategy=[ReadWriteSplittingPlugin] Unsupported host selection strategy '{}' specified in plugin configuration parameter 'reader_host_selector_strategy'. Please visit the Read/Write Splitting Plugin documentation for all supported strategies. +RejectConnectRouting.InProgressCantConnect=[RejectConnectRouting] Blue/Green Deployment switchover is in progress. New connection can't be opened. + RoundRobinHostSelector.ClusterInfoNone=[RoundRobinHostSelector] The round robin cluster information cache should have an entry for the current cluster, but no entry was found. RoundRobinHostSelector.RoundRobinInvalidDefaultWeight=[RoundRobinHostSelector] The provided default weight value is not valid. Weight values must be an integer greater than or equal to 1. RoundRobinHostSelector.RoundRobinInvalidHostWeightPairs= [RoundRobinHostSelector] The provided host weight pairs have not been configured correctly. Please ensure the provided host weight pairs is a comma separated list of pairs, each pair in the format of :. Weight values must be an integer greater than or equal to the default weight value of 1. @@ -296,6 +333,17 @@ StaleDnsHelper.WriterInetAddress=[StaleDnsPlugin] Writer host address: {} StaleDnsPlugin.RequireDynamicProvider=[StaleDnsPlugin] A dynamic host list provider is required for the stale DNS plugin, but the detected host list was a static provider. +SubstituteConnectRouting.InProgressCantOpenConnection=[SubstituteConnectRouting] Blue/Green Deployment switchover is in progress. Can't establish connection to '{}'. +SubstituteConnectRouting.RequireIamHost=[SubstituteConnectRouting] Connecting with IP address when IAM authentication is enabled requires an 'iamHost' parameter. + +SuspendConnectRouting.InProgressHoldConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is in progress. The 'connect' call will be delayed until switchover is completed. +SuspendConnectRouting.InProgressTryConnectLater=[SuspendConnectRouting] Blue/Green Deployment switchover is still in progress after {} ms. Try to connect again later. +SuspendConnectRouting.SwitchoverCompleteContinueWithConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is completed. Continue with connect call. The call was held for {} ms. + +SuspendExecuteRouting.InProgressHoldMethod=[SuspendExecuteRouting] Blue/Green Deployment switchover is in progress. Hold '{}' call until switchover is completed. +SuspendExecuteRouting.StillInProgressTryMethodLater=[SuspendExecuteRouting] Blue/Green Deployment switchover is still in progress after {} ms. Try '{}' again later. +SuspendExecuteRouting.SwitchoverCompletedContinueWithMethod=[SuspendExecuteRouting] Blue/Green Deployment switchover is completed. Continue with '{}' call. The call was held for {} ms. + Testing.CantParse=[Testing] Can't parse {}. Testing.DisabledConnectivity=[Testing] Disabled connectivity to {}. Testing.EnabledConnectivity=[Testing] Enabled connectivity to {}. diff --git a/tests/unit/test_blue_green_plugin.py b/tests/unit/test_blue_green_plugin.py new file mode 100644 index 00000000..e42f31bc --- /dev/null +++ b/tests/unit/test_blue_green_plugin.py @@ -0,0 +1,51 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from types import MappingProxyType + +from aws_advanced_python_wrapper.blue_green_plugin import PassThroughConnectRouting, BlueGreenRole, \ + PassThroughExecuteRouting, BlueGreenStatus, BlueGreenPhase, BlueGreenInterimStatus +from aws_advanced_python_wrapper.hostinfo import HostInfo + +# TODO: remove unnecessary tests +def test_status_str(): + connect_routing1 = PassThroughConnectRouting(None, BlueGreenRole.SOURCE) + connect_routing2 = PassThroughConnectRouting("localhost:5432", BlueGreenRole.SOURCE) + connect_routings = (connect_routing1, connect_routing2) + execute_routing1 = PassThroughExecuteRouting(None, BlueGreenRole.SOURCE) + execute_routing2 = PassThroughExecuteRouting("localhost:5432", BlueGreenRole.SOURCE) + execute_routings = (execute_routing1, execute_routing2) + + role_by_endpoint = MappingProxyType({"localhost-1": BlueGreenRole.SOURCE, "localhost-2": BlueGreenRole.TARGET}) + status = ( + BlueGreenStatus("asdf", BlueGreenPhase.PREPARATION, connect_routings, execute_routings, role_by_endpoint)) + print(f"\n{status}") + +def test_interim_status_str(): + status = BlueGreenInterimStatus( + BlueGreenPhase.CREATED, + "1.0", + 5432, + (HostInfo("instance-1"), HostInfo("instance-2")), + {"instance-1": "1.1.1.1", "instance-2": None}, + (HostInfo("instance-1"), HostInfo("instance-2")), + {"instance-1": "1.1.1.1", "instance-2": None}, + {"instance-1", "instance-2"}, + True, + True, + False + ) + + print(f"\n{status}") + From 51dd60d96458aa278f14422fd1bfeb4aeb08e445 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 5 Jun 2025 14:16:48 -0700 Subject: [PATCH 02/41] Add plugin_to_skip arg to connect/force_connect, is_plugin_in_use --- .../aurora_connection_tracker_plugin.py | 13 -- ...rora_initial_connection_strategy_plugin.py | 22 ++-- .../failover_plugin.py | 25 +--- .../fastest_response_strategy_plugin.py | 19 --- .../host_monitoring_plugin.py | 13 -- aws_advanced_python_wrapper/plugin_service.py | 78 ++++++++---- .../read_write_splitting_plugin.py | 17 +-- .../reader_failover_handler.py | 2 +- .../stale_dns_plugin.py | 12 -- .../writer_failover_handler.py | 7 +- tests/unit/test_plugin_manager.py | 113 ++++++++++++++++-- .../unit/test_read_write_splitting_plugin.py | 4 +- 12 files changed, 179 insertions(+), 146 deletions(-) diff --git a/aws_advanced_python_wrapper/aurora_connection_tracker_plugin.py b/aws_advanced_python_wrapper/aurora_connection_tracker_plugin.py index 699f0dcf..2e0e45ab 100644 --- a/aws_advanced_python_wrapper/aurora_connection_tracker_plugin.py +++ b/aws_advanced_python_wrapper/aurora_connection_tracker_plugin.py @@ -173,19 +173,6 @@ def connect( props: Properties, is_initial_connection: bool, connect_func: Callable) -> Connection: - return self._connect(host_info, connect_func) - - def force_connect( - self, - target_driver_func: Callable, - driver_dialect: DriverDialect, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - force_connect_func: Callable) -> Connection: - return self._connect(host_info, force_connect_func) - - def _connect(self, host_info: HostInfo, connect_func: Callable): conn = connect_func() if conn: diff --git a/aws_advanced_python_wrapper/aurora_initial_connection_strategy_plugin.py b/aws_advanced_python_wrapper/aurora_initial_connection_strategy_plugin.py index 0d1aad50..1cedf303 100644 --- a/aws_advanced_python_wrapper/aurora_initial_connection_strategy_plugin.py +++ b/aws_advanced_python_wrapper/aurora_initial_connection_strategy_plugin.py @@ -35,9 +35,7 @@ class AuroraInitialConnectionStrategyPlugin(Plugin): - _SUBSCRIBED_METHODS: Set[str] = {"init_host_provider", - "connect", - "force_connect"} + _SUBSCRIBED_METHODS: Set[str] = {"init_host_provider", "connect"} _host_list_provider_service: Optional[HostListProviderService] = None @@ -52,29 +50,24 @@ def __init__(self, plugin_service: PluginService): def connect(self, target_driver_func: Callable, driver_dialect: DriverDialect, host_info: HostInfo, props: Properties, is_initial_connection: bool, connect_func: Callable) -> Connection: - return self._connect(host_info, props, is_initial_connection, connect_func) - - def force_connect(self, target_driver_func: Callable, driver_dialect: DriverDialect, host_info: HostInfo, props: Properties, - is_initial_connection: bool, force_connect_func: Callable) -> Connection: - return self._connect(host_info, props, is_initial_connection, force_connect_func) - - def _connect(self, host_info: HostInfo, props: Properties, is_initial_connection: bool, connect_func: Callable): - type: RdsUrlType = self._rds_utils.identify_rds_type(host_info.host) - if not type.is_rds_cluster: + url_type: RdsUrlType = self._rds_utils.identify_rds_type(host_info.host) + if not url_type.is_rds_cluster: return connect_func() - if type == RdsUrlType.RDS_WRITER_CLUSTER: + if url_type == RdsUrlType.RDS_WRITER_CLUSTER: writer_candidate_conn: Optional[Connection] = self._get_verified_writer_connection(props, is_initial_connection, connect_func) if writer_candidate_conn is None: return connect_func() return writer_candidate_conn - if type == RdsUrlType.RDS_READER_CLUSTER: + if url_type == RdsUrlType.RDS_READER_CLUSTER: reader_candidate_conn: Optional[Connection] = self._get_verified_reader_connection(props, is_initial_connection, connect_func) if reader_candidate_conn is None: return connect_func() return reader_candidate_conn + return connect_func() + def _get_verified_writer_connection(self, props: Properties, is_initial_connection: bool, connect_func: Callable) -> Connection | None: retry_delay_ms: int = WrapperProperties.OPEN_CONNECTION_RETRY_INTERVAL_MS.get_int(props) end_time_nano = perf_counter_ns() + (WrapperProperties.OPEN_CONNECTION_RETRY_INTERVAL_MS.get_int(props) * 1000000) @@ -84,7 +77,6 @@ def _get_verified_writer_connection(self, props: Properties, is_initial_connecti while perf_counter_ns() < end_time_nano: writer_candidate_conn = None - writer_candidate = None try: writer_candidate = self._get_writer() diff --git a/aws_advanced_python_wrapper/failover_plugin.py b/aws_advanced_python_wrapper/failover_plugin.py index 497a50bc..b959372b 100644 --- a/aws_advanced_python_wrapper/failover_plugin.py +++ b/aws_advanced_python_wrapper/failover_plugin.py @@ -59,7 +59,6 @@ class FailoverPlugin(Plugin): """ _SUBSCRIBED_METHODS: Set[str] = {"init_host_provider", "connect", - "force_connect", "notify_host_list_changed"} _METHODS_REQUIRE_UPDATED_TOPOLOGY: Set[str] = { @@ -205,28 +204,8 @@ def connect( props: Properties, is_initial_connection: bool, connect_func: Callable) -> Connection: - return self._connect(host_info, props, is_initial_connection, connect_func) - - def force_connect( - self, - target_driver_func: Callable, - driver_dialect: DriverDialect, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - force_connect_func: Callable) -> Connection: - return self._connect(host_info, props, is_initial_connection, force_connect_func) - - def _connect( - self, - host: HostInfo, - properties: Properties, - is_initial_connection: bool, - connect_func: Callable) -> Connection: - conn: Connection = self._stale_dns_helper.get_verified_connection(is_initial_connection, - self._host_list_provider_service, host, - properties, - connect_func) + conn: Connection = self._stale_dns_helper.get_verified_connection( + is_initial_connection, self._host_list_provider_service, host_info, props, connect_func) if is_initial_connection: self._plugin_service.refresh_host_list(conn) diff --git a/aws_advanced_python_wrapper/fastest_response_strategy_plugin.py b/aws_advanced_python_wrapper/fastest_response_strategy_plugin.py index 4f8453f1..26c915cc 100644 --- a/aws_advanced_python_wrapper/fastest_response_strategy_plugin.py +++ b/aws_advanced_python_wrapper/fastest_response_strategy_plugin.py @@ -51,7 +51,6 @@ class FastestResponseStrategyPlugin(Plugin): _FASTEST_RESPONSE_STRATEGY_NAME = "fastest_response" _SUBSCRIBED_METHODS: Set[str] = {"accepts_strategy", "connect", - "force_connect", "get_host_info_by_strategy", "notify_host_list_changed"} @@ -77,24 +76,6 @@ def connect( props: Properties, is_initial_connection: bool, connect_func: Callable) -> Connection: - return self._connect(host_info, props, is_initial_connection, connect_func) - - def force_connect( - self, - target_driver_func: Callable, - driver_dialect: DriverDialect, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - force_connect_func: Callable) -> Connection: - return self._connect(host_info, props, is_initial_connection, force_connect_func) - - def _connect( - self, - host: HostInfo, - properties: Properties, - is_initial_connection: bool, - connect_func: Callable) -> Connection: conn = connect_func() if is_initial_connection: diff --git a/aws_advanced_python_wrapper/host_monitoring_plugin.py b/aws_advanced_python_wrapper/host_monitoring_plugin.py index 58b00ee8..faa76570 100644 --- a/aws_advanced_python_wrapper/host_monitoring_plugin.py +++ b/aws_advanced_python_wrapper/host_monitoring_plugin.py @@ -86,19 +86,6 @@ def connect( props: Properties, is_initial_connection: bool, connect_func: Callable) -> Connection: - return self._connect(host_info, connect_func) - - def force_connect( - self, - target_driver_func: Callable, - driver_dialect: DriverDialect, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - force_connect_func: Callable) -> Connection: - return self._connect(host_info, force_connect_func) - - def _connect(self, host_info: HostInfo, connect_func: Callable) -> Connection: conn = connect_func() if conn: rds_type = self._rds_utils.identify_rds_type(host_info.host) diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 3c108284..21363248 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -34,7 +34,6 @@ from aws_advanced_python_wrapper.driver_dialect_manager import DriverDialectManager from aws_advanced_python_wrapper.pep249 import Connection from aws_advanced_python_wrapper.plugin import Plugin, PluginFactory - from threading import Event from abc import abstractmethod from concurrent.futures import Executor, ThreadPoolExecutor, TimeoutError @@ -236,7 +235,7 @@ def refresh_host_list(self, connection: Optional[Connection] = None): def force_refresh_host_list(self, connection: Optional[Connection] = None): ... - def connect(self, host_info: HostInfo, props: Properties) -> Connection: + def connect(self, host_info: HostInfo, props: Properties, plugin_to_skip: Optional[Plugin] = None) -> Connection: """ Establishes a connection to the given host using the given driver protocol and properties. If a non-default :py:class`ConnectionProvider` has been set with :py:method:`ConnectionProviderManager.set_connection_provider`, @@ -245,11 +244,12 @@ def connect(self, host_info: HostInfo, props: Properties) -> Connection: :param host_info: the host details for the desired connection. :param props: the connection properties. + :param plugin_to_skip: the calling plugin, which will be skipped in the plugin chain when trying to connect. :return: a :py:class`Connection` to the requested host. """ ... - def force_connect(self, host_info: HostInfo, props: Properties, timeout_event: Optional[Event]) -> Connection: + def force_connect(self, host_info: HostInfo, props: Properties, plugin_to_skip: Optional[Plugin] = None) -> Connection: """ Establishes a connection to the given host using the given driver protocol and properties. This call differs from connect in that the default :py:class`DriverConnectionProvider` will be used to establish the connection even if @@ -257,6 +257,7 @@ def force_connect(self, host_info: HostInfo, props: Properties, timeout_event: O :param host_info: the host details for the desired connection. :param props: the connection properties. + :param plugin_to_skip: the calling plugin, which will be skipped in the plugin chain when trying to connect. :return: a :py:class`Connection` to the requested host. """ ... @@ -276,6 +277,10 @@ def get_connection_provider_manager(self) -> ConnectionProviderManager: def get_telemetry_factory(self) -> TelemetryFactory: ... + @abstractmethod + def is_plugin_in_use(self, plugin_class: Type[Plugin]): + ... + class PluginServiceImpl(PluginService, HostListProviderService, CanReleaseResources): _host_availability_expiring_cache: CacheMap[str, HostAvailability] = CacheMap() @@ -504,15 +509,16 @@ def force_refresh_host_list(self, connection: Optional[Connection] = None): self._update_host_availability(updated_host_list) self._update_hosts(updated_host_list) - def connect(self, host_info: HostInfo, props: Properties) -> Connection: + def connect(self, host_info: HostInfo, props: Properties, plugin_to_skip: Optional[Plugin] = None) -> Connection: plugin_manager: PluginManager = self._container.plugin_manager return plugin_manager.connect( - self._target_func, self._driver_dialect, host_info, props, self.current_connection is None) + self._target_func, self._driver_dialect, host_info, props, self.current_connection is None, plugin_to_skip) - def force_connect(self, host_info: HostInfo, props: Properties, timeout_event: Optional[Event]) -> Connection: + def force_connect( + self, host_info: HostInfo, props: Properties, plugin_to_skip: Optional[Plugin] = None) -> Connection: plugin_manager: PluginManager = self._container.plugin_manager return plugin_manager.force_connect( - self._target_func, self._driver_dialect, host_info, props, self.current_connection is None) + self._target_func, self._driver_dialect, host_info, props, self.current_connection is None, plugin_to_skip) def set_availability(self, host_aliases: FrozenSet[str], availability: HostAvailability): ... @@ -630,6 +636,9 @@ def _compare(self, host_a: HostInfo, host_b: HostInfo) -> Set[HostEvent]: return changes + def is_plugin_in_use(self, plugin_class: Type[Plugin]) -> bool: + return self._container.plugin_manager.is_plugin_in_use(plugin_class) + def release_resources(self): try: if self.current_connection is not None and not self.driver_dialect.is_closed( @@ -817,7 +826,8 @@ def execute(self, target: object, method_name: str, target_driver_func: Callable method_name, # next_plugin_func is defined later in make_pipeline lambda plugin, next_plugin_func: plugin.execute(target, method_name, next_plugin_func, *args, **kwargs), - target_driver_func) + target_driver_func, + None) context.set_success(True) @@ -835,32 +845,41 @@ def _execute_with_telemetry(self, plugin_name: str, func: Callable): finally: context.close_context() - def _execute_with_subscribed_plugins(self, method_name: str, plugin_func: Callable, target_driver_func: Callable): + def _execute_with_subscribed_plugins( + self, + method_name: str, + plugin_func: Callable, + target_driver_func: Callable, + plugin_to_skip: Optional[Plugin] = None): pipeline_func: Optional[Callable] = self._function_cache.get(method_name) if pipeline_func is None: - pipeline_func = self._make_pipeline(method_name) + pipeline_func = self._make_pipeline(method_name, plugin_to_skip) self._function_cache[method_name] = pipeline_func return pipeline_func(plugin_func, target_driver_func) # Builds the plugin pipeline function chain. The pipeline is built in a way that allows plugins to perform logic # both before and after the target driver function call. - def _make_pipeline(self, method_name: str) -> Callable: + def _make_pipeline(self, method_name: str, plugin_to_skip: Optional[Plugin] = None) -> Callable: pipeline_func: Optional[Callable] = None num_plugins: int = len(self._plugins) # Build the pipeline starting at the end and working backwards for i in range(num_plugins - 1, -1, -1): plugin: Plugin = self._plugins[i] + if plugin_to_skip is not None and plugin_to_skip == plugin: + continue + subscribed_methods: Set[str] = plugin.subscribed_methods is_subscribed: bool = PluginManager._ALL_METHODS in subscribed_methods or method_name in subscribed_methods + if not is_subscribed: + continue - if is_subscribed: - if pipeline_func is None: - # Defines the call to DefaultPlugin, which is the last plugin in the pipeline - pipeline_func = self._create_base_pipeline_func(plugin) - else: - pipeline_func = self._extend_pipeline_func(plugin, pipeline_func) + if pipeline_func is None: + # Defines the call to DefaultPlugin, which is the last plugin in the pipeline + pipeline_func = self._create_base_pipeline_func(plugin) + else: + pipeline_func = self._extend_pipeline_func(plugin, pipeline_func) if pipeline_func is None: raise AwsWrapperError(Messages.get("PluginManager.PipelineNone")) @@ -887,7 +906,8 @@ def connect( driver_dialect: DriverDialect, host_info: Optional[HostInfo], props: Properties, - is_initial_connection: bool) -> Connection: + is_initial_connection: bool, + plugin_to_skip: Optional[Plugin] = None) -> Connection: context = self._telemetry_factory.open_telemetry_context("connect", TelemetryTraceLevel.NESTED) try: return self._execute_with_subscribed_plugins( @@ -895,7 +915,8 @@ def connect( lambda plugin, func: plugin.connect( target_func, driver_dialect, host_info, props, is_initial_connection, func), # The final connect action will be handled by the ConnectionProvider, so this lambda will not be called. - lambda: None) + lambda: None, + plugin_to_skip) finally: context.close_context() @@ -905,13 +926,15 @@ def force_connect( driver_dialect: DriverDialect, host_info: Optional[HostInfo], props: Properties, - is_initial_connection: bool) -> Connection: + is_initial_connection: bool, + plugin_to_skip: Optional[Plugin] = None) -> Connection: return self._execute_with_subscribed_plugins( PluginManager._FORCE_CONNECT_METHOD, lambda plugin, func: plugin.force_connect( target_func, driver_dialect, host_info, props, is_initial_connection, func), # The final connect action will be handled by the ConnectionProvider, so this lambda will not be called. - lambda: None) + lambda: None, + plugin_to_skip) def notify_connection_changed(self, changes: Set[ConnectionEvent]) -> OldConnectionSuggestedAction: old_conn_suggestions: Set[OldConnectionSuggestedAction] = set() @@ -980,10 +1003,21 @@ def init_host_provider(self, props: Properties, host_list_provider_service: Host return self._execute_with_subscribed_plugins( PluginManager._INIT_HOST_LIST_PROVIDER_METHOD, lambda plugin, func: plugin.init_host_provider(props, host_list_provider_service, func), - lambda: None) + lambda: None, + None) finally: context.close_context() + def is_plugin_in_use(self, plugin_class: Type[Plugin]) -> bool: + if not self._plugins: + return False + + for plugin in self._plugins: + if isinstance(plugin, plugin_class): + return True + + return False + def release_resources(self): """ Allows all connection plugins a chance to clean up any dangling resources diff --git a/aws_advanced_python_wrapper/read_write_splitting_plugin.py b/aws_advanced_python_wrapper/read_write_splitting_plugin.py index 7b00255f..98999aa8 100644 --- a/aws_advanced_python_wrapper/read_write_splitting_plugin.py +++ b/aws_advanced_python_wrapper/read_write_splitting_plugin.py @@ -91,19 +91,6 @@ def connect( Messages.get_formatted("ReadWriteSplittingPlugin.UnsupportedHostInfoSelectorStrategy", self._reader_selector_strategy)) - return self.connect_internal(is_initial_connection, connect_func) - - def force_connect( - self, - target_driver_func: Callable, - driver_dialect: DriverDialect, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - force_connect_func: Callable) -> Connection: - return self.connect_internal(is_initial_connection, force_connect_func) - - def connect_internal(self, is_initial_connection: bool, connect_func: Callable) -> Connection: current_conn = connect_func() if not is_initial_connection or self._host_list_provider_service.is_static_host_list_provider(): @@ -175,7 +162,7 @@ def _set_reader_connection(self, reader_conn: Connection, reader_host_info: Host logger.debug("ReadWriteSplittingPlugin.SetReaderConnection", reader_host_info.url) def _get_new_writer_connection(self, writer_host: HostInfo): - conn = self._plugin_service.connect(writer_host, self._properties) + conn = self._plugin_service.connect(writer_host, self._properties, self) provider = self._conn_provider_manager.get_connection_provider(writer_host, self._properties) self._is_writer_conn_from_internal_pool = (ReadWriteSplittingPlugin._POOL_PROVIDER_CLASS_NAME in str(type(provider))) self._set_writer_connection(conn, writer_host) @@ -303,7 +290,7 @@ def _initialize_reader_connection(self, hosts: Tuple[HostInfo, ...]): host = self._plugin_service.get_host_info_by_strategy(HostRole.READER, self._reader_selector_strategy) if host is not None: try: - conn = self._plugin_service.connect(host, self._properties) + conn = self._plugin_service.connect(host, self._properties, self) provider = self._conn_provider_manager.get_connection_provider(host, self._properties) self._is_reader_conn_from_internal_pool = (ReadWriteSplittingPlugin._POOL_PROVIDER_CLASS_NAME in str(type(provider))) reader_host = host diff --git a/aws_advanced_python_wrapper/reader_failover_handler.py b/aws_advanced_python_wrapper/reader_failover_handler.py index bd10f0f6..d0bc2978 100644 --- a/aws_advanced_python_wrapper/reader_failover_handler.py +++ b/aws_advanced_python_wrapper/reader_failover_handler.py @@ -194,7 +194,7 @@ def attempt_connection(self, host: HostInfo) -> ReaderFailoverResult: logger.debug("ReaderFailoverHandler.AttemptingReaderConnection", host.url, PropertiesUtils.mask_properties(props)) try: - conn: Connection = self._plugin_service.force_connect(host, props, self._timeout_event) + conn: Connection = self._plugin_service.force_connect(host, props) self._plugin_service.set_availability(host.all_aliases, HostAvailability.AVAILABLE) logger.debug("ReaderFailoverHandler.SuccessfulReaderConnection", host.url) diff --git a/aws_advanced_python_wrapper/stale_dns_plugin.py b/aws_advanced_python_wrapper/stale_dns_plugin.py index 6a3e815e..310ee69c 100644 --- a/aws_advanced_python_wrapper/stale_dns_plugin.py +++ b/aws_advanced_python_wrapper/stale_dns_plugin.py @@ -154,7 +154,6 @@ class StaleDnsPlugin(Plugin): _SUBSCRIBED_METHODS: Set[str] = {"init_host_provider", "connect", - "force_connect", "notify_host_list_changed"} def __init__(self, plugin_service: PluginService) -> None: @@ -178,17 +177,6 @@ def connect( return self._stale_dns_helper.get_verified_connection( is_initial_connection, self._host_list_provider_service, host_info, props, connect_func) - def force_connect( - self, - target_driver_func: Callable, - driver_dialect: DriverDialect, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - force_connect_func: Callable) -> Connection: - return self._stale_dns_helper.get_verified_connection( - is_initial_connection, self._host_list_provider_service, host_info, props, force_connect_func) - def execute(self, target: type, method_name: str, execute_func: Callable, *args: Any, **kwargs: Any) -> Any: try: self._plugin_service.refresh_host_list() diff --git a/aws_advanced_python_wrapper/writer_failover_handler.py b/aws_advanced_python_wrapper/writer_failover_handler.py index db1fe24d..51139d81 100644 --- a/aws_advanced_python_wrapper/writer_failover_handler.py +++ b/aws_advanced_python_wrapper/writer_failover_handler.py @@ -172,7 +172,7 @@ def reconnect_to_writer(self, initial_writer_host: HostInfo): if conn is not None: conn.close() - conn = self._plugin_service.force_connect(initial_writer_host, self._initial_connection_properties, self._timeout_event) + conn = self._plugin_service.force_connect(initial_writer_host, self._initial_connection_properties) self._plugin_service.force_refresh_host_list(conn) latest_topology = self._plugin_service.all_hosts @@ -311,9 +311,8 @@ def connect_to_writer(self, writer_candidate: Optional[HostInfo]) -> bool: try: # connect to new writer if writer_candidate is not None: - self._current_connection = self._plugin_service.force_connect(writer_candidate, - self._initial_connection_properties, - self._timeout_event) + self._current_connection = \ + self._plugin_service.force_connect(writer_candidate, self._initial_connection_properties) self._plugin_service.set_availability(writer_candidate.as_aliases(), HostAvailability.AVAILABLE) return True except Exception: diff --git a/tests/unit/test_plugin_manager.py b/tests/unit/test_plugin_manager.py index 9e380a65..aa58382c 100644 --- a/tests/unit/test_plugin_manager.py +++ b/tests/unit/test_plugin_manager.py @@ -16,6 +16,8 @@ from typing import TYPE_CHECKING +from tests.unit.test_fastest_response_strategy_plugin import plugin + if TYPE_CHECKING: from aws_advanced_python_wrapper.driver_dialect import DriverDialect from aws_advanced_python_wrapper.pep249 import Connection @@ -141,7 +143,7 @@ def test_unknown_profile(mocker, mock_telemetry_factory): PluginManager(mocker.MagicMock(), props, mock_telemetry_factory()) -def test_execute_call_a(mocker, mock_conn, container, mock_driver_dialect, mock_telemetry_factory): +def test_execute_call_a(mocker, mock_conn, container, mock_plugin_service, mock_driver_dialect, mock_telemetry_factory): calls = [] args = [10, "arg2", 3.33] plugins = [TestPluginOne(calls), TestPluginTwo(calls), TestPluginThree(calls)] @@ -157,7 +159,7 @@ def test_execute_call_a(mocker, mock_conn, container, mock_driver_dialect, mock_ make_pipeline_func = mocker.patch.object(manager, '_make_pipeline', wraps=manager._make_pipeline) result = manager.execute(mock_conn, "test_call_a", lambda: _target_call(calls), *args) - make_pipeline_func.assert_called_once_with("test_call_a") + make_pipeline_func.assert_called_once_with("test_call_a", None) assert result == "result_value" assert len(calls) == 7 assert calls[0] == "TestPluginOne:before execute" @@ -172,7 +174,7 @@ def test_execute_call_a(mocker, mock_conn, container, mock_driver_dialect, mock_ result = manager.execute(mock_conn, "test_call_a", lambda: _target_call(calls), *args) # The first execute call should cache the pipeline - make_pipeline_func.assert_called_once_with("test_call_a") + make_pipeline_func.assert_called_once_with("test_call_a", None) assert result == "result_value" assert len(calls) == 7 assert calls[0] == "TestPluginOne:before execute" @@ -189,7 +191,7 @@ def _target_call(calls: List[str]): return "result_value" -def test_execute_call_b(mocker, container, mock_driver_dialect, mock_telemetry_factory): +def test_execute_call_b(mocker, container, mock_driver_dialect, mock_telemetry_factory, mock_conn): calls = [] args = [10, "arg2", 3.33] plugins = [TestPluginOne(calls), TestPluginTwo(calls), TestPluginThree(calls)] @@ -212,7 +214,7 @@ def test_execute_call_b(mocker, container, mock_driver_dialect, mock_telemetry_f assert calls[4] == "TestPluginOne:after execute" -def test_execute_call_c(mocker, container, mock_driver_dialect, mock_telemetry_factory): +def test_execute_call_c(mocker, container, mock_driver_dialect, mock_telemetry_factory, mock_conn): calls = [] args = [10, "arg2", 3.33] plugins = [TestPluginOne(calls), TestPluginTwo(calls), TestPluginThree(calls)] @@ -233,7 +235,7 @@ def test_execute_call_c(mocker, container, mock_driver_dialect, mock_telemetry_f assert calls[2] == "TestPluginOne:after execute" -def test_execute_against_old_target(mocker, container, mock_driver_dialect, mock_telemetry_factory): +def test_execute_against_old_target(mocker, container, mock_driver_dialect, mock_telemetry_factory, mock_conn): mocker.patch.object(PluginManager, "__init__", lambda w, x, y, z: None) manager = PluginManager(mocker.MagicMock(), mocker.MagicMock(), mocker.MagicMock()) manager._container = container @@ -269,6 +271,87 @@ def test_connect(mocker, container, mock_conn, mock_driver_dialect, mock_telemet assert calls[3] == "TestPluginOne:after connect" +def test_connect__skip_plugin(mocker, container, mock_conn, mock_driver_dialect, mock_telemetry_factory): + calls = [] + + plugin1 = TestPluginOne(calls) + plugins = [plugin1, TestPluginTwo(calls), TestPluginThree(calls, mock_conn)] + + mocker.patch.object(PluginManager, "__init__", lambda w, x, y, z: None) + manager = PluginManager(mocker.MagicMock(), mocker.MagicMock(), mocker.MagicMock()) + manager._plugins = plugins + manager._function_cache = {} + manager._telemetry_factory = mock_telemetry_factory + manager._container = container + + result = manager.connect(mocker.MagicMock(), mocker.MagicMock(), HostInfo("localhost"), Properties(), True, plugin1) + + assert result == mock_conn + assert len(calls) == 2 + assert calls[0] == "TestPluginThree:before connect" + assert calls[1] == "TestPluginThree:after connect" + + +def test_force_connect(mocker, container, mock_conn, mock_driver_dialect, mock_telemetry_factory): + calls = [] + + plugins = [TestPluginOne(calls), TestPluginTwo(calls), TestPluginThree(calls, mock_conn)] + + mocker.patch.object(PluginManager, "__init__", lambda w, x, y, z: None) + manager = PluginManager(mocker.MagicMock(), mocker.MagicMock(), mocker.MagicMock()) + manager._plugins = plugins + manager._function_cache = {} + manager._telemetry_factory = mock_telemetry_factory + manager._container = container + + make_pipeline_func = mocker.patch.object(manager, '_make_pipeline', wraps=manager._make_pipeline) + # The first call to force_connect should generate the plugin pipeline and cache it + result = manager.force_connect(mocker.MagicMock(), mocker.MagicMock(), HostInfo("localhost"), Properties(), True) + + make_pipeline_func.assert_called_once_with("force_connect", None) + assert result == mock_conn + assert len(calls) == 4 + assert calls[0] == "TestPluginOne:before forceConnect" + assert calls[1] == "TestPluginThree:before forceConnect" + assert calls[2] == "TestPluginThree:after forceConnect" + assert calls[3] == "TestPluginOne:after forceConnect" + + calls.clear() + + result = manager.force_connect(mocker.MagicMock(), mocker.MagicMock(), HostInfo("localhost"), Properties(), True) + + # The second call should have used the cached plugin pipeline, so make_pipeline should not have been called again + make_pipeline_func.assert_called_once_with("force_connect", None) + assert result == mock_conn + assert len(calls) == 4 + assert calls[0] == "TestPluginOne:before forceConnect" + assert calls[1] == "TestPluginThree:before forceConnect" + assert calls[2] == "TestPluginThree:after forceConnect" + assert calls[3] == "TestPluginOne:after forceConnect" + + +def test_force_connect__cached(mocker, container, mock_conn, mock_driver_dialect, mock_telemetry_factory): + calls = [] + + plugins = [TestPluginOne(calls), TestPluginTwo(calls), TestPluginThree(calls, mock_conn)] + + mocker.patch.object(PluginManager, "__init__", lambda w, x, y, z: None) + manager = PluginManager(mocker.MagicMock(), mocker.MagicMock(), mocker.MagicMock()) + manager._plugins = plugins + manager._function_cache = {} + manager._telemetry_factory = mock_telemetry_factory + manager._container = container + + result = manager.force_connect(mocker.MagicMock(), mocker.MagicMock(), HostInfo("localhost"), Properties(), True) + + assert result == mock_conn + assert len(calls) == 4 + assert calls[0] == "TestPluginOne:before forceConnect" + assert calls[1] == "TestPluginThree:before forceConnect" + assert calls[2] == "TestPluginThree:after forceConnect" + assert calls[3] == "TestPluginOne:after forceConnect" + + def test_exception_before_connect(mocker, container, mock_telemetry_factory): calls = [] plugins = \ @@ -407,6 +490,22 @@ def connect( self._calls.append(type(self).__name__ + ":after connect") return result + def force_connect( + self, + target_driver_func: Callable, + driver_dialect: DriverDialect, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable) -> Connection: + self._calls.append(type(self).__name__ + ":before forceConnect") + if self._connection is not None: + result = self._connection + else: + result = connect_func() + self._calls.append(type(self).__name__ + ":after forceConnect") + return result + def execute(self, target: object, method_name: str, execute_func: Callable, *args: Any, **kwargs: Any) -> Any: self._calls.append(type(self).__name__ + ":before execute") result = execute_func() @@ -446,7 +545,7 @@ class TestPluginThree(TestPlugin): @property def subscribed_methods(self) -> Set[str]: - return {"test_call_a", "connect", "notify_connection_changed", "notify_host_list_changed"} + return {"test_call_a", "connect", "force_connect", "notify_connection_changed", "notify_host_list_changed"} def notify_connection_changed(self, changes: Set[ConnectionEvent]) -> OldConnectionSuggestedAction: self._calls.append(type(self).__name__ + ":notify_connection_changed") diff --git a/tests/unit/test_read_write_splitting_plugin.py b/tests/unit/test_read_write_splitting_plugin.py index 64d456f1..e77f67e2 100644 --- a/tests/unit/test_read_write_splitting_plugin.py +++ b/tests/unit/test_read_write_splitting_plugin.py @@ -351,7 +351,7 @@ def get_host_role_side_effect(conn): def test_close_pooled_reader_connection_after_set_read_only(mocker, plugin_service_mock): - def connect_side_effect(host, props): + def connect_side_effect(host, props, plugin): if host in [reader_host1, reader_host2, reader_host3]: return reader_conn_mock elif host == writer_host: @@ -384,7 +384,7 @@ def connect_side_effect(host, props): def test_close_pooled_writer_connection_after_set_read_only(mocker, plugin_service_mock): - def connect_side_effect(host, props): + def connect_side_effect(host, props, plugin): if host in [reader_host1, reader_host2, reader_host3]: return reader_conn_mock elif host == writer_host: From 16bc9f0f7318fe644e0d56b4594e7b431aa0b5ee Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 5 Jun 2025 14:19:01 -0700 Subject: [PATCH 03/41] Add set_status, get_status --- aws_advanced_python_wrapper/plugin_service.py | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 21363248..3fd5eb08 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, List, Type +from typing import TYPE_CHECKING, ClassVar, List, Type, TypeVar, cast from aws_advanced_python_wrapper.aurora_initial_connection_strategy_plugin import \ AuroraInitialConnectionStrategyPluginFactory @@ -110,6 +110,8 @@ def plugin_manager(self, value): self._plugin_manager = value +T = TypeVar('T') + class PluginService(ExceptionHandler, Protocol): @property @abstractmethod @@ -281,9 +283,19 @@ def get_telemetry_factory(self) -> TelemetryFactory: def is_plugin_in_use(self, plugin_class: Type[Plugin]): ... + @abstractmethod + def set_status(self, clazz: Type[T], status: Optional[T], key: str): + ... + + @abstractmethod + def get_status(self, clazz: Type[T], key: str) -> T: + ... + class PluginServiceImpl(PluginService, HostListProviderService, CanReleaseResources): + _STATUS_CACHE_EXPIRATION_NANO = 60 * 1_000_000_000 # one hour _host_availability_expiring_cache: CacheMap[str, HostAvailability] = CacheMap() + _status_cache: ClassVar[CacheMap[str, Any]] = CacheMap() _executor: ClassVar[Executor] = ThreadPoolExecutor(thread_name_prefix="PluginServiceImplExecutor") @@ -652,6 +664,20 @@ def release_resources(self): if host_list_provider is not None and isinstance(host_list_provider, CanReleaseResources): host_list_provider.release_resources() + def set_status(self, clazz: Type[T], status: Optional[T], key: str): + cache_key = self._get_status_cache_key(clazz, key) + if status is None: + self._status_cache.remove(cache_key) + else: + self._status_cache.put(cache_key, status, PluginServiceImpl._STATUS_CACHE_EXPIRATION_NANO) + + def _get_status_cache_key(self, clazz: Type[T], key: str) -> str: + key_str = "" if key is None else key.strip().lower() + return f"{key_str}::{clazz.__name__}" + + def get_status(self, clazz: Type[T], key: str) -> T: + return cast(clazz, PluginServiceImpl._status_cache.get(self._get_status_cache_key(clazz, key))) + class PluginManager(CanReleaseResources): _ALL_METHODS: str = "*" @@ -827,7 +853,7 @@ def execute(self, target: object, method_name: str, target_driver_func: Callable # next_plugin_func is defined later in make_pipeline lambda plugin, next_plugin_func: plugin.execute(target, method_name, next_plugin_func, *args, **kwargs), target_driver_func, - None) + None) context.set_success(True) From 589fb8436a2a5c94fa3027c741a83b76aef50694 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 5 Jun 2025 14:57:04 -0700 Subject: [PATCH 04/41] Fix build errors --- .flake8 | 2 ++ aws_advanced_python_wrapper/plugin_service.py | 23 +++++++++++++++---- ...dvanced_python_wrapper_messages.properties | 1 + tests/unit/test_plugin_manager.py | 2 -- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/.flake8 b/.flake8 index 37bdecd2..847900aa 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,5 @@ [flake8] max-line-length = 150 extend-select = TC, TC1 +exclude = + venv/, diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 3fd5eb08..53099133 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, List, Type, TypeVar, cast +from typing import TYPE_CHECKING, ClassVar, List, Type, TypeVar from aws_advanced_python_wrapper.aurora_initial_connection_strategy_plugin import \ AuroraInitialConnectionStrategyPluginFactory @@ -112,6 +112,7 @@ def plugin_manager(self, value): T = TypeVar('T') + class PluginService(ExceptionHandler, Protocol): @property @abstractmethod @@ -288,7 +289,7 @@ def set_status(self, clazz: Type[T], status: Optional[T], key: str): ... @abstractmethod - def get_status(self, clazz: Type[T], key: str) -> T: + def get_status(self, clazz: Type[T], key: str) -> Optional[T]: ... @@ -675,8 +676,22 @@ def _get_status_cache_key(self, clazz: Type[T], key: str) -> str: key_str = "" if key is None else key.strip().lower() return f"{key_str}::{clazz.__name__}" - def get_status(self, clazz: Type[T], key: str) -> T: - return cast(clazz, PluginServiceImpl._status_cache.get(self._get_status_cache_key(clazz, key))) + def get_status(self, clazz: Type[T], key: str) -> Optional[T]: + cache_key = self._get_status_cache_key(clazz, key) + status = PluginServiceImpl._status_cache.get(cache_key) + if status is None: + return None + + if not isinstance(status, clazz): + raise ValueError( + Messages.get_formatted( + "PluginServiceImpl.incorrectStatusType", + clazz.__name__, + key, + status.__class__.__name__, + status)) + + return status class PluginManager(CanReleaseResources): diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index 3c0b341e..a77a8ec3 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -204,6 +204,7 @@ PluginManager.ResortedPlugins=[PluginManager] Plugins order has been rearranged. PluginServiceImpl.FailedToRetrieveHostPort=[PluginServiceImpl] Could not retrieve Host:Port for connection. {} PluginServiceImpl.FillAliasesTimeout=[PluginServiceImpl] The timeout limit was reached while querying for the current host's alias. PluginServiceImpl.GetHostRoleConnectionNone=[PluginServiceImpl] Attempted to evaluate the host role of the given connection, but could not find a non-None connection to evaluate. +PluginServiceImpl.IncorrectStatusType=[PluginServiceImpl] Received an unexpected type from the status cache. An object of type {} was requested, but the object at key '{}' had a type of {}. The retrieved object was: {}. PluginServiceImpl.NonEmptyAliases=[PluginServiceImpl] fill_aliases called when HostInfo already contains the following aliases: {}. PluginServiceImpl.UnableToUpdateTransactionStatus=[PluginServiceImpl] Unable to update transaction status, current connection is None. PluginServiceImpl.UpdateDialectConnectionNone=[PluginServiceImpl] The plugin service attempted to update the current dialect but could not identify a connection to use. diff --git a/tests/unit/test_plugin_manager.py b/tests/unit/test_plugin_manager.py index aa58382c..286b3f3a 100644 --- a/tests/unit/test_plugin_manager.py +++ b/tests/unit/test_plugin_manager.py @@ -16,8 +16,6 @@ from typing import TYPE_CHECKING -from tests.unit.test_fastest_response_strategy_plugin import plugin - if TYPE_CHECKING: from aws_advanced_python_wrapper.driver_dialect import DriverDialect from aws_advanced_python_wrapper.pep249 import Connection From 704ab135fd2cba1197c0d35fd98228f63f013f06 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 5 Jun 2025 15:20:32 -0700 Subject: [PATCH 05/41] Add is_ip() --- aws_advanced_python_wrapper/plugin_service.py | 9 +++++++++ aws_advanced_python_wrapper/utils/rdsutils.py | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 53099133..ae15caca 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -146,6 +146,11 @@ def set_current_connection(self, connection: Connection, host_info: HostInfo): def current_host_info(self) -> Optional[HostInfo]: ... + @property + @abstractmethod + def original_url(self) -> str: + ... + @property @abstractmethod def initial_connection_host_info(self) -> Optional[HostInfo]: @@ -416,6 +421,10 @@ def set_current_connection(self, connection: Optional[Connection], host_info: Op def current_host_info(self) -> Optional[HostInfo]: return self._current_host_info + @property + def original_url(self) -> str: + return self._original_url + @property def initial_connection_host_info(self) -> Optional[HostInfo]: return self._initial_connection_host_info diff --git a/aws_advanced_python_wrapper/utils/rdsutils.py b/aws_advanced_python_wrapper/utils/rdsutils.py index d1f0f812..28b3f014 100644 --- a/aws_advanced_python_wrapper/utils/rdsutils.py +++ b/aws_advanced_python_wrapper/utils/rdsutils.py @@ -210,6 +210,9 @@ def get_instance_id(self, host: str) -> Optional[str]: return None + def is_ip(self, host: str) -> bool: + return self.is_ipv4(host) or self.is_ipv6(host) + def is_ipv4(self, host: str) -> bool: if host is None or not host.strip(): return False @@ -227,7 +230,7 @@ def identify_rds_type(self, host: Optional[str]) -> RdsUrlType: if host is None or not host.strip(): return RdsUrlType.OTHER - if self.is_ipv4(host) or self.is_ipv6(host): + if self.is_ip(host): return RdsUrlType.IP_ADDRESS elif self.is_writer_cluster_dns(host): return RdsUrlType.RDS_WRITER_CLUSTER From 117ce35828bb8a4bdcb50164c7991ff98627e7fc Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Tue, 10 Jun 2025 10:20:56 -0700 Subject: [PATCH 06/41] All BlueGreen classes except BlueGreenStatusProvider --- .../blue_green_plugin.py | 981 ++++++++++++++++-- .../database_dialect.py | 122 ++- aws_advanced_python_wrapper/hostinfo.py | 29 +- aws_advanced_python_wrapper/plugin_service.py | 7 +- ...dvanced_python_wrapper_messages.properties | 22 +- aws_advanced_python_wrapper/utils/atomic.py | 7 + .../utils/properties.py | 39 + aws_advanced_python_wrapper/utils/rdsutils.py | 40 + .../utils/value_container.py | 76 ++ tests/unit/test_blue_green_plugin.py | 47 +- tests/unit/test_failover_plugin.py | 4 +- tests/unit/test_hostinfo.py | 2 +- tests/unit/test_reader_failover_handler.py | 30 +- 13 files changed, 1260 insertions(+), 146 deletions(-) create mode 100644 aws_advanced_python_wrapper/utils/value_container.py diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 9ff14d39..a2551781 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -14,25 +14,44 @@ from __future__ import annotations +import socket +from time import perf_counter_ns +from typing import TYPE_CHECKING, FrozenSet, cast + +from aws_advanced_python_wrapper.database_dialect import BlueGreenDialect +from aws_advanced_python_wrapper.host_list_provider import HostListProvider +from aws_advanced_python_wrapper.utils.value_container import ValueContainer + +if TYPE_CHECKING: + from aws_advanced_python_wrapper.pep249 import Connection + from aws_advanced_python_wrapper.driver_dialect import DriverDialect + from aws_advanced_python_wrapper.host_list_provider import HostListProviderService + from aws_advanced_python_wrapper.plugin_service import PluginService + import time from abc import ABC, abstractmethod +from copy import copy from dataclasses import dataclass from enum import Enum, auto -from threading import Condition +from threading import Condition, Event, Thread from types import MappingProxyType -from typing import Optional, Tuple, Any, Callable, Dict, Set, NoReturn, ClassVar +from typing import (Any, Callable, ClassVar, Dict, Optional, Protocol, Set, + Tuple) -from mysql.connector import Connect - -from aws_advanced_python_wrapper.errors import AwsWrapperError, UnsupportedOperationError +from aws_advanced_python_wrapper.errors import AwsWrapperError +from aws_advanced_python_wrapper.host_availability import HostAvailability from aws_advanced_python_wrapper.hostinfo import HostInfo -from aws_advanced_python_wrapper.pep249 import Connection -from aws_advanced_python_wrapper.plugin import Plugin -from aws_advanced_python_wrapper.plugin_service import PluginService +from aws_advanced_python_wrapper.iam_plugin import IamAuthPlugin +from aws_advanced_python_wrapper.plugin import Plugin, PluginFactory +from aws_advanced_python_wrapper.utils.atomic import AtomicInt +from aws_advanced_python_wrapper.utils.concurrent import ConcurrentDict from aws_advanced_python_wrapper.utils.log import Logger from aws_advanced_python_wrapper.utils.messages import Messages -from aws_advanced_python_wrapper.utils.properties import Properties +from aws_advanced_python_wrapper.utils.properties import (Properties, + WrapperProperties) from aws_advanced_python_wrapper.utils.rdsutils import RdsUtils +from aws_advanced_python_wrapper.utils.telemetry.telemetry import \ + TelemetryTraceLevel logger = Logger(__name__) @@ -46,10 +65,10 @@ class BlueGreenIntervalRate(Enum): class BlueGreenPhase(Enum): NOT_CREATED = (0, False) CREATED = (1, False) - PREPARATION = (2, True) # nodes are accessible - IN_PROGRESS = (3, True) # active phase; nodes are not accessible - POST = (4, True) # nodes are accessible; some change are still in progress - COMPLETED = (5, True) # all changes are completed + PREPARATION = (2, True) # nodes are accessible + IN_PROGRESS = (3, True) # active phase; nodes are not accessible + POST = (4, True) # nodes are accessible; some change are still in progress + COMPLETED = (5, True) # all changes are completed def __new__(cls, value: int, is_switchover_active_or_completed: bool): obj = object.__new__(cls) @@ -103,20 +122,22 @@ def parse_role(role_str: str, version: str) -> BlueGreenRole: class BlueGreenStatus: - def __init__(self, - bg_id: str, - phase: BlueGreenPhase, - connect_routing: Tuple[ConnectRouting, ...] = (), - execute_routing: Tuple[ExecuteRouting, ...] = (), - role_by_endpoint: MappingProxyType[str, BlueGreenRole] = MappingProxyType({}) - ): + def __init__( + self, + bg_id: str, + phase: BlueGreenPhase, + connect_routing: Tuple[ConnectRouting, ...] = (), + execute_routing: Tuple[ExecuteRouting, ...] = (), + role_by_host: MappingProxyType[str, BlueGreenRole] = MappingProxyType({}), + node_pairs_by_host: MappingProxyType[str, Tuple[HostInfo, Optional[HostInfo]]] = MappingProxyType({})): self.bg_id = bg_id self.phase = phase - self.connect_routings = tuple(connect_routing) - self.execute_routings = tuple(execute_routing) - self.role_by_endpoint = MappingProxyType(role_by_endpoint) + self.connect_routings = connect_routing + self.execute_routings = execute_routing + self.role_by_endpoint = role_by_host + self.node_pairs_by_host = node_pairs_by_host - def get_role(self, host_info: HostInfo) -> BlueGreenRole: + def get_role(self, host_info: HostInfo) -> Optional[BlueGreenRole]: return self.role_by_endpoint.get(host_info.host.lower()) def __str__(self) -> str: @@ -145,9 +166,9 @@ class BlueGreenInterimStatus: version: str port: int start_topology: Tuple[HostInfo, ...] - start_ip_addresses_by_host_map: Dict[str, Optional[str]] + start_ip_addresses_by_host_map: ConcurrentDict[str, ValueContainer[str]] current_topology: Tuple[HostInfo, ...] - current_ip_addresses_by_host_map: Dict[str, Optional[str]] + current_ip_addresses_by_host_map: ConcurrentDict[str, ValueContainer[str]] host_names: Set[str] all_start_topology_ip_changed: bool all_start_topology_endpoints_removed: bool @@ -174,8 +195,8 @@ def get_host_tuple_hash(self, current_hash: int, host_tuple: Optional[Tuple[Host tuple_str = ",".join(sorted(x.url + x.role for x in host_tuple)) return self.get_value_hash(current_hash, tuple_str) - - def get_ip_dict_hash(self, current_hash: int, ip_dict: Optional[Dict[str, Optional[str]]]) -> int: + + def get_ip_dict_hash(self, current_hash: int, ip_dict: Optional[ConcurrentDict[str, ValueContainer[str]]]) -> int: if ip_dict is None or len(ip_dict) == 0: dict_str = "" else: @@ -224,35 +245,37 @@ def __str__(self): class ConnectRouting(ABC): @abstractmethod - def is_match(self, host_info: HostInfo, role: BlueGreenRole) -> bool: + def is_match(self, host_info: Optional[HostInfo], role: BlueGreenRole) -> bool: ... @abstractmethod - def apply(self, - plugin: Plugin, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - connect_func: Callable, - plugin_service: PluginService) -> Connection: + def apply( + self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Optional[Connection]: ... class ExecuteRouting(ABC): @abstractmethod - def is_match(self, host_info: HostInfo, role: BlueGreenRole) -> bool: + def is_match(self, host_info: Optional[HostInfo], role: BlueGreenRole) -> bool: ... @abstractmethod - def apply(self, - plugin: Plugin, - plugin_service: PluginService, - props: Properties, - target: type, - method_name: str, - execute_func: Callable, - *args: Any, - **kwargs: Any) -> Optional[Any]: + def apply( + self, + plugin: Plugin, + plugin_service: PluginService, + props: Properties, + target: type, + method_name: str, + execute_func: Callable, + *args: Any, + **kwargs: Any) -> ValueContainer[Any]: ... @@ -264,19 +287,19 @@ def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): self._endpoint = endpoint # host and optionally port as well self._bg_role = bg_role - def delay(self, delay_ms: int, bg_status: BlueGreenStatus, plugin_service: PluginService, bg_id: str): - end_time = time.time() + (delay_ms / 1_000) + def delay(self, delay_ms: int, bg_status: Optional[BlueGreenStatus], plugin_service: PluginService, bg_id: str): + end_time_sec = time.time() + (delay_ms / 1_000) min_delay_ms = min(delay_ms, BaseRouting._MIN_SLEEP_MS) if bg_status is None: time.sleep(delay_ms / 1_000) return - while bg_status is plugin_service.get_status(BlueGreenStatus, bg_id) and time.time() < end_time: + while bg_status is plugin_service.get_status(BlueGreenStatus, bg_id) and time.time() < end_time_sec: with self._cv: self._cv.wait(min_delay_ms / 1_000) - def is_match(self, host_info: HostInfo, bg_role: BlueGreenRole) -> bool: + def is_match(self, host_info: Optional[HostInfo], bg_role: BlueGreenRole) -> bool: if self._endpoint is None: return self._bg_role is None or self._bg_role == bg_role @@ -294,13 +317,14 @@ class PassThroughConnectRouting(BaseRouting, ConnectRouting): def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): super().__init__(endpoint, bg_role) - def apply(self, - plugin: Plugin, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - connect_func: Callable, - plugin_service: PluginService) -> Connection: + def apply( + self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Optional[Connection]: return connect_func() @@ -308,51 +332,830 @@ class RejectConnectRouting(BaseRouting, ConnectRouting): def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): super().__init__(endpoint, bg_role) - def apply(self, - plugin: Plugin, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - connect_func: Callable, - plugin_service: PluginService) -> Connection: + def apply( + self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Optional[Connection]: raise AwsWrapperError(Messages.get("RejectConnectRouting.InProgressCantConnect")) class SubstituteConnectRouting(BaseRouting, ConnectRouting): _rds_utils: ClassVar[RdsUtils] = RdsUtils() - def __init__(self, - endpoint: Optional[str], - bg_role: Optional[BlueGreenRole], - substitute_host_info: HostInfo, - iam_hosts: Optional[Tuple[HostInfo, ...]], - on_iam_connect_func: Optional[Callable]): + def __init__( + self, + endpoint: Optional[str], + bg_role: Optional[BlueGreenRole], + substitute_host_info: HostInfo, + iam_hosts: Optional[Tuple[HostInfo, ...]], + iam_auth_success_handler: Optional[IamAuthSuccessHandler]): super().__init__(endpoint, bg_role) self._substitute_host_info = substitute_host_info self._iam_hosts = iam_hosts - self._on_iam_connect_func = on_iam_connect_func - - def apply(self, - plugin: Plugin, - host_info: HostInfo, - props: Properties, - is_initial_connection: bool, - connect_func: Callable, - plugin_service: PluginService) -> Connection: + self._iam_auth_success_handler = iam_auth_success_handler + + def __str__(self): + iam_hosts_str = ',\n '.join(str(iam_host) for iam_host in self._iam_hosts) + return (f"{self.__class__.__name__}(\n" + f" endpoint={self._endpoint},\n" + f" bg_role={self._bg_role},\n" + f" substitute_host_info={self._substitute_host_info},\n" + f" iam_hosts=[\n" + f" {iam_hosts_str}\n" + f" ],\n" + f" hash={hex(hash(self))}\n" + f")") + + def apply( + self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Optional[Connection]: + if not SubstituteConnectRouting._rds_utils.is_ip(self._substitute_host_info.host): + return plugin_service.connect(self._substitute_host_info, props, plugin) + + is_iam_in_use = plugin_service.is_plugin_in_use(IamAuthPlugin) + if not is_iam_in_use: + return plugin_service.connect(self._substitute_host_info, props, plugin) + + if not self._iam_hosts: + raise AwsWrapperError(Messages.get("SubstituteConnectRouting.RequireIamHost")) + + for iam_host in self._iam_hosts: + rerouted_host_info = copy(host_info) + rerouted_host_info.host_id = iam_host.host_id + rerouted_host_info.availability = HostAvailability.AVAILABLE + rerouted_host_info.add_alias(iam_host.host) + + rerouted_props = copy(props) + WrapperProperties.IAM_HOST.set(rerouted_props, iam_host.host) + if iam_host.is_port_specified(): + WrapperProperties.IAM_DEFAULT_PORT.set(rerouted_props, iam_host.port) + + try: + conn = plugin_service.connect(rerouted_host_info, rerouted_props) + if self._iam_auth_success_handler is not None: + try: + self._iam_auth_success_handler.on_iam_success(iam_host.host) + except Exception: + pass # do nothing + + return conn + except AwsWrapperError as e: + if not plugin_service.is_login_exception(e): + raise e + # do nothing - try with another iam host + + raise AwsWrapperError( + Messages.get_formatted( + "SubstituteConnectRouting.InProgressCantOpenConnection", self._substitute_host_info.url)) + + +class IamAuthSuccessHandler(Protocol): + def on_iam_success(self, iam_host: str): ... +class SuspendConnectRouting(BaseRouting, ConnectRouting): + _TELEMETRY_SWITCHOVER: ClassVar[str] = "Blue/Green switchover" + _SLEEP_TIME_MS = 100 + + def __init__( + self, + endpoint: Optional[str], + bg_role: Optional[BlueGreenRole], + bg_id: str): + super().__init__(endpoint, bg_role) + self._bg_id = bg_id + + def apply( + self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Optional[Connection]: + logger.debug("SuspendConnectRouting.InProgressSuspendConnect") + + telemetry_factory = plugin_service.get_telemetry_factory() + telemetry_context = telemetry_factory.open_telemetry_context( + SuspendConnectRouting._TELEMETRY_SWITCHOVER, TelemetryTraceLevel.NESTED) + + bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) + timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) + start_time_sec = time.time() + end_time_sec = start_time_sec + timeout_ms / 1_000 + + try: + while time.time() < end_time_sec and \ + bg_status is not None and \ + bg_status.phase == BlueGreenPhase.IN_PROGRESS: + self.delay(SuspendConnectRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) + bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) + + if bg_status is not None and bg_status.phase == BlueGreenPhase.IN_PROGRESS: + raise TimeoutError( + Messages.get_formatted("SuspendConnectRouting.InProgressTryConnectLater", timeout_ms)) + + logger.debug( + Messages.get_formatted( + "SuspendConnectRouting.SwitchoverCompleteContinueWithConnect", + (time.time() - start_time_sec) / 1000)) + finally: + telemetry_context.close_context() + + # return None so that the next routing can attempt a connection + return None + + +class SuspendUntilCorrespondingNodeFoundConnectRouting(BaseRouting, ConnectRouting): + _TELEMETRY_SWITCHOVER: ClassVar[str] = "Blue/Green switchover" + _SLEEP_TIME_MS = 100 + + def __init__( + self, + endpoint: Optional[str], + bg_role: Optional[BlueGreenRole], + bg_id: str): + super().__init__(endpoint, bg_role) + self._bg_id = bg_id + + def apply( + self, + plugin: Plugin, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable, + plugin_service: PluginService) -> Optional[Connection]: + logger.debug("SuspendConnectRouting.WaitConnectUntilCorrespondingNodeFound", host_info.host) + + telemetry_factory = plugin_service.get_telemetry_factory() + telemetry_context = telemetry_factory.open_telemetry_context( + SuspendUntilCorrespondingNodeFoundConnectRouting._TELEMETRY_SWITCHOVER, TelemetryTraceLevel.NESTED) + + bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) + corresponding_pair = None if bg_status is None else bg_status.node_pairs_by_host.get(host_info.host) + + timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) + start_time_sec = time.time() + end_time_sec = start_time_sec + timeout_ms / 1_000 + + try: + while time.time() < end_time_sec and \ + bg_status is not None and \ + bg_status.phase != BlueGreenPhase.COMPLETED and \ + (corresponding_pair is None or corresponding_pair[1] is None): + # wait until the corresponding node is found, or until switchover is completed + self.delay( + SuspendUntilCorrespondingNodeFoundConnectRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) + bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) + corresponding_pair = None if bg_status is None else bg_status.node_pairs_by_host.get(host_info.host) + + if bg_status is None or bg_status.phase == BlueGreenPhase.COMPLETED: + logger.debug( + "SuspendUntilCorrespondingNodeFoundConnectRouting.CompletedContinueWithConnect", + (time.time() - start_time_sec) / 1000) + return None + + if time.time() > end_time_sec: + raise TimeoutError( + Messages.get_formatted( + "SuspendUntilCorrespondingNodeFoundConnectRouting.CorrespondingNodeNotFoundTryConnectLater", + host_info.host, + (time.time() - start_time_sec) / 1000)) + + logger.debug( + Messages.get_formatted( + "SuspendUntilCorrespondingNodeFoundConnectRouting.CorrespondingNodeFoundContinueWithConnect", + host_info.host, + (time.time() - start_time_sec) / 1000)) + finally: + telemetry_context.close_context() + + # return None so that the next routing can attempt a connection + return None + + class PassThroughExecuteRouting(BaseRouting, ExecuteRouting): def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): super().__init__(endpoint, bg_role) - def apply(self, - plugin: Plugin, - plugin_service: PluginService, - props: Properties, - target: type, - method_name: str, - execute_func: Callable, - *args: Any, - **kwargs: Any) -> Optional[Any]: - return execute_func() + def apply( + self, + plugin: Plugin, + plugin_service: PluginService, + props: Properties, + target: type, + method_name: str, + execute_func: Callable, + *args: Any, + **kwargs: Any) -> ValueContainer[Any]: + return ValueContainer.of(execute_func()) + + +class SuspendExecuteRouting(BaseRouting, ExecuteRouting): + _TELEMETRY_SWITCHOVER: ClassVar[str] = "Blue/Green switchover" + _SLEEP_TIME_MS = 100 + + def __init__( + self, + endpoint: Optional[str], + bg_role: Optional[BlueGreenRole], + bg_id: str): + super().__init__(endpoint, bg_role) + self._bg_id = bg_id + + def apply( + self, + plugin: Plugin, + plugin_service: PluginService, + props: Properties, + target: type, + method_name: str, + execute_func: Callable, + *args: Any, + **kwargs: Any) -> ValueContainer[Any]: + logger.debug("SuspendExecuteRouting.InProgressSuspendMethod") + + telemetry_factory = plugin_service.get_telemetry_factory() + telemetry_context = telemetry_factory.open_telemetry_context( + SuspendExecuteRouting._TELEMETRY_SWITCHOVER, TelemetryTraceLevel.NESTED) + + bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) + timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) + start_time_sec = time.time() + end_time_sec = start_time_sec + timeout_ms / 1_000 + + try: + while time.time() < end_time_sec and \ + bg_status is not None and \ + bg_status.phase == BlueGreenPhase.IN_PROGRESS: + self.delay(SuspendExecuteRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) + bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) + + if bg_status is not None and bg_status.phase == BlueGreenPhase.IN_PROGRESS: + raise TimeoutError( + Messages.get_formatted( + "SuspendExecuteRouting.InProgressTryMethodLater", + timeout_ms, method_name)) + + logger.debug( + Messages.get_formatted( + "SuspendExecuteRouting.SwitchoverCompleteContinueWithMethod", + method_name, + (time.time() - start_time_sec) / 1000)) + finally: + telemetry_context.close_context() + + # return empty so that the next routing can attempt a connection + return ValueContainer.empty() + + +class BlueGreenPlugin(Plugin): + _SUBSCRIBED_METHODS: Set[str] = {"connect"} + _CLOSE_METHODS: ClassVar[Set[str]] = {"Connection.close", "Cursor.close"} + _status_providers: ClassVar[ConcurrentDict[str, BlueGreenStatusProvider]] = ConcurrentDict() + + def __init__(self, plugin_service: PluginService, props: Properties): + self._plugin_service = plugin_service + self._props = props + self._telemetry_factory = plugin_service.get_telemetry_factory() + self._provider_supplier: Callable = \ + lambda _plugin_service, _props, bg_id: BlueGreenStatusProvider(_plugin_service, _props, bg_id) + self._bg_id = WrapperProperties.BG_ID.get_or_default(props).strip().lower() + self._rds_utils = RdsUtils() + self._bg_status: Optional[BlueGreenStatus] = None + self._is_iam_in_use = False + self._start_time_nano = AtomicInt(0) + self._end_time_nano = AtomicInt(0) + + self._SUBSCRIBED_METHODS.update(self._plugin_service.network_bound_methods) + + @property + def subscribed_methods(self) -> Set[str]: + return self._SUBSCRIBED_METHODS + + def connect( + self, + target_driver_func: Callable, + driver_dialect: DriverDialect, + host_info: HostInfo, + props: Properties, + is_initial_connection: bool, + connect_func: Callable) -> Connection: + self._reset_routing_time() + try: + self._bg_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + if self._bg_status is None: + return self._open_direct_connection(connect_func, is_initial_connection) + + if is_initial_connection: + self._is_iam_in_use = self._plugin_service.is_plugin_in_use(IamAuthPlugin) + + bg_role = self._bg_status.get_role(host_info) + if bg_role is None: + # The host is not participating in BG switchover - connect directly + return self._open_direct_connection(connect_func, is_initial_connection) + + routing = next((r for r in self._bg_status.connect_routings if r.is_match(host_info, bg_role)), None) + if not routing: + return self._open_direct_connection(connect_func, is_initial_connection) + + self._start_time_nano.set(perf_counter_ns()) + conn: Optional[Connection] = None + while routing is not None and conn is None: + conn = routing.apply(self, host_info, props, is_initial_connection, connect_func, self._plugin_service) + if conn is None: + self._bg_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + if self._bg_status is None: + # TODO: should we just continue in this case? + continue + routing = \ + next((r for r in self._bg_status.connect_routings if r.is_match(host_info, bg_role)), None) + + self._end_time_nano.set(perf_counter_ns()) + if conn is None: + conn = connect_func() + + if is_initial_connection: + self._init_status_provider() + + return conn + finally: + if self._start_time_nano.get() > 0: + self._end_time_nano.compare_and_set(0, perf_counter_ns()) + + def _reset_routing_time(self): + self._start_time_nano.set(0) + self._end_time_nano.set(0) + + def _open_direct_connection(self, connect_func: Callable, is_initial_connection: bool) -> Connection: + conn = connect_func() + if is_initial_connection: + self._init_status_provider() + + return conn + + def _init_status_provider(self): + self._status_providers.compute_if_absent( + self._bg_id, + lambda key: self._provider_supplier(self._plugin_service, self._props, self._bg_id)) + + def execute(self, target: type, method_name: str, execute_func: Callable, *args: Any, **kwargs: Any) -> Any: + self._reset_routing_time() + try: + self._init_status_provider() + if method_name in BlueGreenPlugin._CLOSE_METHODS: + return execute_func() + + self._bg_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + if self._bg_status is None: + return execute_func() + + host_info = self._plugin_service.current_host_info + bg_role = None if host_info is None else self._bg_status.get_role(host_info) + if bg_role is None: + # The host is not participating in BG switchover - execute directly + return execute_func() + + routing = next((r for r in self._bg_status.execute_routings if r.is_match(host_info, bg_role)), None) + if routing is None: + return execute_func() + + result: ValueContainer[Any] = ValueContainer.empty() + self._start_time_nano.set(perf_counter_ns()) + while routing is not None and result is None: + result = routing.apply( + self, + self._plugin_service, + self._props, + target, + method_name, + execute_func, + *args, + **kwargs) + if not result.is_present(): + self._bg_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + routing = \ + next((r for r in self._bg_status.execute_routings if r.is_match(host_info, bg_role)), None) + + self._end_time_nano.set(perf_counter_ns()) + if result.is_present(): + return result.get() + + return execute_func() + finally: + if self._start_time_nano.get() > 0: + self._end_time_nano.compare_and_set(0, perf_counter_ns()) + + +class BlueGreenPluginFactory(PluginFactory): + def get_instance(self, plugin_service: PluginService, props: Properties) -> Plugin: + return BlueGreenPlugin(plugin_service, props) + + +class BlueGreenInterimStatusProcessor(Protocol): + def process_interim_status(self, role: BlueGreenRole, interim_status: BlueGreenInterimStatus): + ... + + +class BlueGreenStatusMonitor: + _DEFAULT_STATUS_CHECK_INTERVAL_MS: ClassVar[int] = 5 * 60_000 # 5 minutes + _BG_CLUSTER_ID: ClassVar[str] = "941d00a8-8238-4f7d-bf59-771bff783a8e" + _LATEST_KNOWN_VERSION: ClassVar[str] = "1.0" + # Add more versions here if needed. + _KNOWN_VERSIONS: ClassVar[FrozenSet[str]] = frozenset({_LATEST_KNOWN_VERSION}) + + def __init__( + self, + bg_role: BlueGreenRole, + bg_id: str, + initial_host_info: HostInfo, + plugin_service: PluginService, + props: Properties, + status_check_intervals_ms: Dict[BlueGreenIntervalRate, int], + interim_status_processor: Optional[BlueGreenInterimStatusProcessor] = None): + self._bg_role = bg_role + self._bg_id = bg_id + self._initial_host_info = initial_host_info + self._plugin_service = plugin_service + self._props = props + self._status_check_intervals_ms = status_check_intervals_ms + self._interim_status_processor = interim_status_processor + + self._rds_utils = RdsUtils() + self._cv = Condition() + self._should_collect_ip_addresses = Event() + self._should_collect_ip_addresses.set() + self._should_collect_topology = Event() + self._should_collect_topology.set() + self._use_ip_address = Event() + self._panic_mode = Event() + self._panic_mode.set() + self._stop = Event() + self._interval_rate = BlueGreenIntervalRate.BASELINE + self._host_list_provider: Optional[HostListProvider] = None + self._start_topology: Tuple[HostInfo, ...] = () + self._current_topology: Tuple[HostInfo, ...] = () + self._start_ip_addresses_by_host: ConcurrentDict[str, ValueContainer[str]] = ConcurrentDict() + self._current_ip_addresses_by_host: ConcurrentDict[str, ValueContainer[str]] = ConcurrentDict() + self._all_start_topology_ip_changed = False + self._all_start_topology_endpoints_removed = False + self._all_topology_changed = False + self._current_phase: Optional[BlueGreenPhase] = BlueGreenPhase.NOT_CREATED + self._host_names: Set[str] = set() + self._version = "1.0" + self._port = -1 + self._connection: Optional[Connection] = None + self._connection_host_info: Optional[HostInfo] = None + self._connected_ip_address: Optional[str] = None + self._is_host_info_correct = Event() + + db_dialect = self._plugin_service.database_dialect + if not isinstance(db_dialect, BlueGreenDialect): + raise AwsWrapperError(Messages.get_formatted("BlueGreenStatusMonitor.UnexpectedDialect", db_dialect)) + + self._bg_dialect: BlueGreenDialect = cast('BlueGreenDialect', self._plugin_service.database_dialect) + + self._open_connection_thread: Optional[Thread] = None + self._monitor_thread = Thread(daemon=True, name="BlueGreenMonitorThread", target=self._run) + self._monitor_thread.start() + + def _run(self): + try: + while not self._stop.is_set(): + try: + old_phase = self._current_phase + self._open_connection() + self._collect_status() + self._collect_topology() + self._collect_ip_addresses() + self._update_ip_address_flags() + + if self._current_phase is not None and (old_phase is None or old_phase != self._current_phase): + logger.debug("BlueGreenStatusMonitor.StatusChanged", self._bg_role, self._current_phase) + + if self._interim_status_processor is not None: + self._interim_status_processor.process_interim_status( + self._bg_role, + BlueGreenInterimStatus( + self._current_phase, + self._version, + self._port, + self._start_topology, + self._start_ip_addresses_by_host, + self._current_topology, + self._current_ip_addresses_by_host, + self._host_names, + self._all_start_topology_ip_changed, + self._all_start_topology_endpoints_removed, + self._all_topology_changed) + ) + + interval_rate = BlueGreenIntervalRate.HIGH if self._panic_mode.is_set() else self._interval_rate + delay_ms = self._status_check_intervals_ms.get( + interval_rate, BlueGreenStatusMonitor._DEFAULT_STATUS_CHECK_INTERVAL_MS) + self._delay(delay_ms) + except Exception as e: + logger.warning("BlueGreenStatusMonitor.MonitoringUnhandledException", self._bg_role, e) + finally: + self._close_connection() + logger.debug("BlueGreenStatusMonitor.ThreadCompleted", self._bg_role) + + def _open_connection(self): + conn = self._connection + if not self._is_connection_closed(conn): + return + + if self._open_connection_thread is not None: + if self._open_connection_thread.is_alive(): + return # The task to open the connection is in progress, let's wait. + elif not self._panic_mode.is_set(): + return # The connection should be open by now since the open connection task is not running. + + self._connection = None + self._panic_mode.set() + self._open_connection_thread = \ + Thread(daemon=True, name="BlueGreenMonitorConnectionOpener", target=self._open_connection_task) + self._open_connection_thread.start() + + def _open_connection_task(self): + host_info = self._connection_host_info + ip_address = self._connected_ip_address + if host_info is None: + self._connection_host_info = self._initial_host_info + host_info = self._initial_host_info + self._connected_ip_address = None + ip_address = None + self._is_host_info_correct = False + + try: + if self._use_ip_address.is_set() and ip_address is not None: + ip_host_info = copy(host_info) + ip_host_info.host = ip_address + props_copy = copy(self._props) + WrapperProperties.IAM_HOST.set(props_copy, ip_host_info.host) + + logger.debug( + "BlueGreenStatusMonitor.OpeningConnectionWithIp", self._bg_role, ip_host_info.host) + self._connection = self._plugin_service.force_connect(ip_host_info, props_copy) + logger.debug( + "BlueGreenStatusMonitor.OpenedConnectionWithIp", self._bg_role, ip_host_info.host) + else: + logger.debug("BlueGreenStatusMonitor.OpeningConnection", self._bg_role, host_info.host) + self._connection = self._plugin_service.force_connect(host_info, self._props) + self._connected_ip_address = self._get_ip_address(host_info.host) + logger.debug("BlueGreenStatusMonitor.OpenedConnection", self._bg_role, host_info.host) + + self._panic_mode.clear() + self._notify_changes() + except Exception: + # Attempt to open connection failed. + self._connection = None + self._panic_mode.set() + self._notify_changes() + + def _get_ip_address(self, host: str) -> ValueContainer[str]: + try: + return ValueContainer.of(socket.gethostbyname(host)) + except socket.gaierror: + return ValueContainer.empty() + + def _notify_changes(self): + with self._cv: + self._cv.notify_all() + + def _collect_status(self): + conn = self._connection + try: + if self._is_connection_closed(conn): + return + + if not self._bg_dialect.is_blue_green_status_available(conn): + if self._plugin_service.driver_dialect.is_closed(conn): + self._connection = None + self._current_phase = None + self._panic_mode.set() + else: + self._current_phase = BlueGreenPhase.NOT_CREATED + logger.debug( + "BlueGreenStatusMonitor.StatusNotAvailable", self._bg_role, BlueGreenPhase.NOT_CREATED) + return + + status_entries = [] + with conn.cursor() as cursor: + cursor.execute(self._bg_dialect.blue_green_status_query) + for record in cursor: + version = record["version"] + if version not in BlueGreenStatusMonitor._KNOWN_VERSIONS: + self._version = BlueGreenStatusMonitor._LATEST_KNOWN_VERSION + logger.warning( + "BlueGreenStatusMonitor.UsesVersion", self._bg_role, version, self._version) + + endpoint = record["endpoint"] + port = record["port"] + bg_role = BlueGreenRole.parse_role(record["role"], self._version) + phase = BlueGreenPhase.parse_phase(record["status"]) + + if self._bg_role != bg_role: + continue + + status_entries.append(BlueGreenDbStatusInfo(version, endpoint, port, phase, bg_role)) + + # Attempt to find the writer cluster status info + # Attempt to find the writer cluster status info + status_info = next((status for status in status_entries + if self._rds_utils.is_writer_cluster_dns(status.endpoint) and + self._rds_utils.is_not_old_instance(status.endpoint)), + None) + if status_info is None: + # Grab an instance endpoint instead + status_info = next((status for status in status_entries + if self._rds_utils.is_rds_instance(status.endpoint) and + self._rds_utils.is_not_old_instance(status.endpoint)), + None) + else: + # Writer cluster endpoint has been found, add the reader cluster endpoint as well. + self._host_names.add(status_info.endpoint.replace(".cluster-", ".cluster-ro-")) + + if status_info is None: + if len(status_entries) == 0: + # The status table may have no entries after BGD is completed. The old1 cluster/instance has + # been separated and no longer receives updates from the related green cluster/instance. + if self._bg_role != BlueGreenRole.SOURCE: + logger.warning("BlueGreenStatusMonitor.NoEntriesInStatusTable", self._bg_role) + + self._current_phase = None + else: + self._current_phase = status_info.phase + self._version = status_info.version + self._port = status_info.port + + if self._should_collect_topology.is_set(): + current_host_names = {status.endpoint.lower() for status in status_entries + if status.endpoint is not None and + self._rds_utils.is_not_old_instance(status.endpoint)} + self._host_names.update(current_host_names) + + if not self._is_host_info_correct and status_info is not None: + # We connected to an initial host info that might not be the desired blue or green cluster. Let's check + # if we need to reconnect to the correct one. + status_info_ip_address = self._get_ip_address(status_info.endpoint) + connected_ip_address = self._connected_ip_address + if connected_ip_address is not None and connected_ip_address != status_info_ip_address: + # We are not connected to the desired blue or green cluster, we need to reconnect. + self._connection_host_info = HostInfo(host=status_info.endpoint, port=status_info.port) + self._is_host_info_correct = True + self._close_connection() + self._panic_mode.set() + else: + # We are already connected to the right node. + self._is_host_info_correct = True + self._panic_mode.clear() + + if self._is_host_info_correct and self._host_list_provider is not None: + # A connection to the correct cluster (blue or green) has been stablished. Let's initialize the host + # list provider. + self._init_host_list_provider() + except Exception as e: + if not self._is_connection_closed(self._connection): + # It's normal to get a connection closed error during BGD switchover, but the connection isn't closed so + # let's log the error. + logger.debug("BlueGreenStatusMonitor.UnhandledException", self._bg_role, e) + self._close_connection() + self._panic_mode.set() + + def _close_connection(self): + conn = self._connection + self._connection = None + if conn is not None and not self._plugin_service.driver_dialect.is_closed(conn): + try: + conn.close() + except Exception: + pass + + def _init_host_list_provider(self): + if self._host_list_provider is not None or not self._is_host_info_correct: + return + + # We need to instantiate a separate HostListProvider with a special unique cluster ID to avoid interference with + # other HostListProviders opened for this cluster. Blue and Green clusters should have different cluster IDs. + + props_copy = copy(self._props) + cluster_id = f"{self._bg_id}::{self._bg_role}::{BlueGreenStatusMonitor._BG_CLUSTER_ID}" + WrapperProperties.CLUSTER_ID.set(props_copy, cluster_id) + logger.debug("BlueGreenStatusMonitor.CreateHostListProvider", self._bg_role, cluster_id) + + host_info = self._connection_host_info + if host_info is None: + logger.warning("BlueGreenStatusMonitor.HostInfoNone") + return + + host_list_provider_supplier = self._plugin_service.database_dialect.get_host_list_provider_supplier() + host_list_provider_service: HostListProviderService = cast('HostListProviderService', self._plugin_service) + self._host_list_provider = host_list_provider_supplier(host_list_provider_service, props_copy) + + def _is_connection_closed(self, conn: Optional[Connection]) -> bool: + return conn is None or self._plugin_service.driver_dialect.is_closed(conn) + + def _delay(self, delay_ms: int): + start_ns = perf_counter_ns() + end_ns = start_ns + delay_ms * 1_000_000 + initial_interval_rate = self._interval_rate + initial_panic_mode_val = self._panic_mode.is_set() + min_delay_sec = min(delay_ms, 50) / 1_000 + + while self._interval_rate == initial_interval_rate and \ + perf_counter_ns() < end_ns and \ + not self._stop.is_set() and \ + initial_panic_mode_val == self._panic_mode.is_set(): + with self._cv: + self._cv.wait(min_delay_sec) + + def _collect_topology(self): + if self._host_list_provider is None: + return + + conn = self._connection + if self._is_connection_closed(conn): + return + + self._current_topology = self._host_list_provider.force_refresh(conn) + if self._should_collect_topology: + self._start_topology = self._current_topology + + current_topology_copy = self._current_topology + if current_topology_copy is not None and self._should_collect_topology: + self._host_names.update({host_info.host for host_info in current_topology_copy}) + + def _collect_ip_addresses(self): + self._current_ip_addresses_by_host.clear() + if self._host_names is not None: + for host in self._host_names: + self._current_ip_addresses_by_host.put_if_absent(host, self._get_ip_address(host)) + + if self._should_collect_ip_addresses: + self._start_ip_addresses_by_host.clear() + for k, v in self._current_ip_addresses_by_host.items(): + self._start_ip_addresses_by_host.put_if_absent(k, v) + + def _update_ip_address_flags(self): + if self._should_collect_topology: + self._all_start_topology_ip_changed = False + self._all_start_topology_endpoints_removed = False + self._all_topology_changed = False + return + + if not self._should_collect_ip_addresses: + # Check whether all hosts in start_topology resolve to new IP addresses + # TODO: do we need to make the value type equivalent to Java.Optional? + self._all_start_topology_ip_changed = bool(self._start_topology) and \ + all( + self._start_ip_addresses_by_host.get(node.host) is not None and + self._current_ip_addresses_by_host.get(node.host) is not None and + self._start_ip_addresses_by_host.get(node.host) != self._current_ip_addresses_by_host.get(node.host) + for node in self._start_topology) + + # Check whether all hosts in start_topology no longer have IP addresses. This indicates that the start_topology + # hosts can no longer be resolved because their DNS entries no longer exist. + self._all_start_topology_endpoints_removed = bool(self._start_topology) and \ + all( + self._start_ip_addresses_by_host.get(node.host) is not None and + self._current_ip_addresses_by_host.get(node.host) is None + for node in self._start_topology + ) + + if not self._should_collect_topology: + # Check whether all hosts in current_topology do not exist in start_topology + start_topology_hosts = set() if self._start_topology is None else \ + {host_info.host for host_info in self._start_topology} + current_topology_copy = self._current_topology + self._all_topology_changed = ( + current_topology_copy and + start_topology_hosts and + all(node.host not in start_topology_hosts for node in current_topology_copy)) + + +@dataclass +class BlueGreenDbStatusInfo: + version: str + endpoint: str + port: int + phase: BlueGreenPhase + bg_role: BlueGreenRole + + +class BlueGreenStatusProvider: + def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str): + self._plugin_service = plugin_service + self._props = props + self._bg_id = bg_id diff --git a/aws_advanced_python_wrapper/database_dialect.py b/aws_advanced_python_wrapper/database_dialect.py index 4d997e7d..56f5a91c 100644 --- a/aws_advanced_python_wrapper/database_dialect.py +++ b/aws_advanced_python_wrapper/database_dialect.py @@ -24,7 +24,7 @@ from .driver_dialect import DriverDialect from .exception_handling import ExceptionHandler -from abc import abstractmethod +from abc import ABC, abstractmethod from concurrent.futures import Executor, ThreadPoolExecutor, TimeoutError from contextlib import closing from enum import Enum, auto @@ -190,9 +190,10 @@ def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: with closing(conn.cursor()) as cursor: cursor.execute(self.server_version_query) for record in cursor: - for column_value in record: - if "mysql" in column_value.lower(): - return True + if len(record) < 2: + return False + if "mysql" in record[1].lower(): + return True except Exception: if not initial_transaction_status and driver_dialect.is_in_transaction(conn): conn.rollback() @@ -254,18 +255,44 @@ def prepare_conn_props(self, props: Properties): pass -class RdsMysqlDialect(MysqlDatabaseDialect): +class BlueGreenDialect(ABC): + @property + @abstractmethod + def blue_green_status_query(self) -> str: + ... + + @abstractmethod + def is_blue_green_status_available(self, conn: Connection) -> bool: + ... + + +class RdsMysqlDialect(MysqlDatabaseDialect, BlueGreenDialect): _DIALECT_UPDATE_CANDIDATES = (DialectCode.AURORA_MYSQL, DialectCode.MULTI_AZ_MYSQL) + _BG_STATUS_QUERY = "SELECT * FROM mysql.rds_topology" + _BG_STATUS_EXISTS_QUERY = \ + "SELECT 1 AS tmp FROM information_schema.tables WHERE table_schema = 'mysql' AND table_name = 'rds_topology'" + def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: initial_transaction_status: bool = driver_dialect.is_in_transaction(conn) try: with closing(conn.cursor()) as cursor: cursor.execute(self.server_version_query) - for record in cursor: - for column_value in record: - if "source distribution" in column_value.lower(): - return True + record = cursor.fetchone() + if record is None or len(record) < 2: + return False + + if "source distribution" != record[1].lower(): + return True + + with closing(conn.cursor()) as cursor: + cursor.execute("SHOW VARIABLES LIKE 'report_host'") + record = cursor.fetchone() + if record is None or len(record) < 2: + return False + + report_host = record[1] + return report_host is not None and report_host != "" except Exception: if not initial_transaction_status and driver_dialect.is_in_transaction(conn): conn.rollback() @@ -276,14 +303,29 @@ def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: def dialect_update_candidates(self) -> Optional[Tuple[DialectCode, ...]]: return RdsMysqlDialect._DIALECT_UPDATE_CANDIDATES + @property + def blue_green_status_query(self) -> str: + return RdsMysqlDialect._BG_STATUS_QUERY -class RdsPgDialect(PgDatabaseDialect): + def is_blue_green_status_available(self, conn: Connection) -> bool: + try: + with closing(conn.cursor()) as cursor: + cursor.execute(RdsMysqlDialect._BG_STATUS_EXISTS_QUERY) + return cursor.fetchone() is not None + except Exception: + return False + + +class RdsPgDialect(PgDatabaseDialect, BlueGreenDialect): _EXTENSIONS_QUERY = ("SELECT (setting LIKE '%rds_tools%') AS rds_tools, " "(setting LIKE '%aurora_stat_utils%') AS aurora_stat_utils " "FROM pg_settings " "WHERE name='rds.extensions'") _DIALECT_UPDATE_CANDIDATES = (DialectCode.AURORA_PG, DialectCode.MULTI_AZ_PG) + _BG_STATUS_QUERY = f"SELECT * FROM rds_tools.show_topology('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')" + _BG_STATUS_EXISTS_QUERY = "SELECT 'rds_tools.show_topology'::regproc" + def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: initial_transaction_status: bool = driver_dialect.is_in_transaction(conn) if not super().is_dialect(conn, driver_dialect): @@ -309,8 +351,20 @@ def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: def dialect_update_candidates(self) -> Optional[Tuple[DialectCode, ...]]: return RdsPgDialect._DIALECT_UPDATE_CANDIDATES + @property + def blue_green_status_query(self) -> str: + return RdsPgDialect._BG_STATUS_QUERY -class AuroraMysqlDialect(MysqlDatabaseDialect, TopologyAwareDatabaseDialect): + def is_blue_green_status_available(self, conn: Connection) -> bool: + try: + with closing(conn.cursor()) as cursor: + cursor.execute(RdsPgDialect._BG_STATUS_EXISTS_QUERY) + return cursor.fetchone() is not None + except Exception: + return False + + +class AuroraMysqlDialect(MysqlDatabaseDialect, TopologyAwareDatabaseDialect, BlueGreenDialect): _DIALECT_UPDATE_CANDIDATES = (DialectCode.MULTI_AZ_MYSQL,) _TOPOLOGY_QUERY = ("SELECT SERVER_ID, CASE WHEN SESSION_ID = 'MASTER_SESSION_ID' THEN TRUE ELSE FALSE END, " "CPU, REPLICA_LAG_IN_MILLISECONDS, LAST_UPDATE_TIMESTAMP " @@ -320,6 +374,10 @@ class AuroraMysqlDialect(MysqlDatabaseDialect, TopologyAwareDatabaseDialect): _HOST_ID_QUERY = "SELECT @@aurora_server_id" _IS_READER_QUERY = "SELECT @@innodb_read_only" + _BG_STATUS_QUERY = "SELECT * FROM mysql.rds_topology" + _BG_STATUS_EXISTS_QUERY = \ + "SELECT 1 AS tmp FROM information_schema.tables WHERE table_schema = 'mysql' AND table_name = 'rds_topology'" + @property def dialect_update_candidates(self) -> Optional[Tuple[DialectCode, ...]]: return AuroraMysqlDialect._DIALECT_UPDATE_CANDIDATES @@ -341,8 +399,20 @@ def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: def get_host_list_provider_supplier(self) -> Callable: return lambda provider_service, props: RdsHostListProvider(provider_service, props) + @property + def blue_green_status_query(self) -> str: + return AuroraMysqlDialect._BG_STATUS_QUERY + + def is_blue_green_status_available(self, conn: Connection) -> bool: + try: + with closing(conn.cursor()) as cursor: + cursor.execute(AuroraMysqlDialect._BG_STATUS_EXISTS_QUERY) + return cursor.fetchone() is not None + except Exception: + return False + -class AuroraPgDialect(PgDatabaseDialect, TopologyAwareDatabaseDialect): +class AuroraPgDialect(PgDatabaseDialect, TopologyAwareDatabaseDialect, BlueGreenDialect): _DIALECT_UPDATE_CANDIDATES: Tuple[DialectCode, ...] = (DialectCode.MULTI_AZ_PG,) _EXTENSIONS_QUERY = "SELECT (setting LIKE '%aurora_stat_utils%') AS aurora_stat_utils " \ @@ -360,6 +430,9 @@ class AuroraPgDialect(PgDatabaseDialect, TopologyAwareDatabaseDialect): _HOST_ID_QUERY = "SELECT aurora_db_instance_identifier()" _IS_READER_QUERY = "SELECT pg_is_in_recovery()" + _BG_STATUS_QUERY = f"SELECT * FROM get_blue_green_fast_switchover_metadata('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')" + _BG_STATUS_EXISTS_QUERY = "SELECT 'get_blue_green_fast_switchover_metadata'::regproc" + @property def dialect_update_candidates(self) -> Optional[Tuple[DialectCode, ...]]: return AuroraPgDialect._DIALECT_UPDATE_CANDIDATES @@ -396,6 +469,18 @@ def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: def get_host_list_provider_supplier(self) -> Callable: return lambda provider_service, props: RdsHostListProvider(provider_service, props) + @property + def blue_green_status_query(self) -> str: + return AuroraPgDialect._BG_STATUS_QUERY + + def is_blue_green_status_available(self, conn: Connection) -> bool: + try: + with closing(conn.cursor()) as cursor: + cursor.execute(AuroraPgDialect._BG_STATUS_EXISTS_QUERY) + return cursor.fetchone() is not None + except Exception: + return False + class MultiAzMysqlDialect(MysqlDatabaseDialect, TopologyAwareDatabaseDialect): _TOPOLOGY_QUERY = "SELECT id, endpoint, port FROM mysql.rds_topology" @@ -414,8 +499,17 @@ def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: with closing(conn.cursor()) as cursor: cursor.execute(MultiAzMysqlDialect._TOPOLOGY_QUERY) records = cursor.fetchall() - if records is not None and len(records) > 0: - return True + if not records: + return False + + with closing(conn.cursor()) as cursor: + cursor.execute("SHOW VARIABLES LIKE 'report_host'") + record = cursor.fetchone() + if record is None or len(record) < 2: + return False + + report_host = record[1] + return report_host is not None and report_host != "" except Exception: if not initial_transaction_status and driver_dialect.is_in_transaction(conn): conn.rollback() diff --git a/aws_advanced_python_wrapper/hostinfo.py b/aws_advanced_python_wrapper/hostinfo.py index ac102fe7..a6313abd 100644 --- a/aws_advanced_python_wrapper/hostinfo.py +++ b/aws_advanced_python_wrapper/hostinfo.py @@ -34,7 +34,7 @@ class HostRole(Enum): @dataclass(eq=False) class HostInfo: NO_PORT: ClassVar[int] = -1 - DEFAULT_WEIGHT = 100 + DEFAULT_WEIGHT: ClassVar[int] = 100 def __init__( self, @@ -49,9 +49,9 @@ def __init__( self.host = host self.port = port self.role = role - self._availability = availability + self.availability = availability self.host_availability_strategy = host_availability_strategy - self.weight = weight, + self.weight = weight self.host_id = host_id self.last_update_time = last_update_time @@ -66,11 +66,22 @@ def __eq__(self, other: object): return self.host == other.host \ and self.port == other.port \ - and self._availability == other._availability \ + and self.availability == other.availability \ and self.role == other.role def __str__(self): - return f"HostInfo({self.host}, {self.port}, {self.role}, {self._availability})" + return f"HostInfo({self.host}, {self.port}, {self.role}, {self.availability})" + + def __copy__(self): + return HostInfo( + host=self.host, + port=self.port, + role=self.role, + availability=self.availability, + weight=self.weight, + host_id=self.host_id, + last_update_time=self.last_update_time + ) @property def url(self): @@ -119,15 +130,15 @@ def is_port_specified(self) -> bool: def get_availability(self) -> HostAvailability: if self.host_availability_strategy is not None: - return self.host_availability_strategy.get_host_availability(self._availability) + return self.host_availability_strategy.get_host_availability(self.availability) - return self._availability + return self.availability def get_raw_availability(self) -> HostAvailability: - return self._availability + return self.availability def set_availability(self, availability: HostAvailability): - self._availability = availability + self.availability = availability if self.host_availability_strategy is not None: self.host_availability_strategy.set_host_availability(availability) diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index ae15caca..1ef46540 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -18,6 +18,8 @@ from aws_advanced_python_wrapper.aurora_initial_connection_strategy_plugin import \ AuroraInitialConnectionStrategyPluginFactory +from aws_advanced_python_wrapper.blue_green_plugin import \ + BlueGreenPluginFactory from aws_advanced_python_wrapper.custom_endpoint_plugin import \ CustomEndpointPluginFactory from aws_advanced_python_wrapper.fastest_response_strategy_plugin import \ @@ -497,6 +499,7 @@ def update_dialect(self, connection: Optional[Connection] = None): if original_dialect != self._database_dialect: host_list_provider_init = self._database_dialect.get_host_list_provider_supplier() self.host_list_provider = host_list_provider_init(self, self._props) + self.refresh_host_list(connection) def update_driver_dialect(self, connection_provider: ConnectionProvider): self._driver_dialect = self._driver_dialect_manager.get_pool_connection_driver_dialect( @@ -727,7 +730,8 @@ class PluginManager(CanReleaseResources): "dev": DeveloperPluginFactory, "federated_auth": FederatedAuthPluginFactory, "okta": OktaAuthPluginFactory, - "initial_connection": AuroraInitialConnectionStrategyPluginFactory + "initial_connection": AuroraInitialConnectionStrategyPluginFactory, + "bg": BlueGreenPluginFactory } WEIGHT_RELATIVE_TO_PRIOR_PLUGIN = -1 @@ -743,6 +747,7 @@ class PluginManager(CanReleaseResources): ReadWriteSplittingPluginFactory: 300, FailoverPluginFactory: 400, HostMonitoringPluginFactory: 500, + BlueGreenPluginFactory: 550, FastestResponseStrategyPluginFactory: 600, IamAuthPluginFactory: 700, AwsSecretsManagerPluginFactory: 800, diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index d7832cec..3ea84d02 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -47,9 +47,9 @@ BlueGreenRole.UnknownVersion=[BlueGreenRole] Unknown blue/green version '{}'. BlueGreenStatusMonitor.CreateHostListProvider=[BlueGreenStatusMonitor] [{}] Creating a new HostListProvider, clusterId: {}. BlueGreenStatusMonitor.Exception=[BlueGreenStatusMonitor] [{}] currentPhase: {}, exception while querying for blue green status. -BlueGreenStatusMonitor.HostSpecNull=[BlueGreenStatusMonitor] Unable to initialize HostListProvider since connection host information is null. +BlueGreenStatusMonitor.HostInfoNone=[BlueGreenStatusMonitor] Unable to initialize HostListProvider since connection host information is None. BlueGreenStatusMonitor.Interrupted=[BlueGreenStatusMonitor] [{}] Interrupted. -BlueGreenStatusMonitor.MonitoringUnhandledException=[BlueGreenStatusMonitor] [{}] Unhandled exception while monitoring blue/green status. +BlueGreenStatusMonitor.MonitoringUnhandledException=[BlueGreenStatusMonitor] [{}] Unhandled exception while monitoring blue/green status: {}. BlueGreenStatusMonitor.NoEntriesInStatusTable=[BlueGreenStatusMonitor] [{}] No entries in status table. BlueGreenStatusMonitor.OpenedConnection=[BlueGreenStatusMonitor] [{}] Opened monitoring connection to {}. BlueGreenStatusMonitor.OpenedConnectionWithIp=[BlueGreenStatusMonitor] [{}] Opened monitoring connection (IP) to {}. @@ -58,8 +58,8 @@ BlueGreenStatusMonitor.OpeningConnectionWithIp=[BlueGreenStatusMonitor] [{}] Ope BlueGreenStatusMonitor.StatusChanged=[BlueGreenStatusMonitor] [{}] Status changed to: {} BlueGreenStatusMonitor.StatusNotAvailable=[BlueGreenStatusMonitor] [{}] (status not available) currentPhase: {} BlueGreenStatusMonitor.ThreadCompleted=[BlueGreenStatusMonitor] [{}] Blue/green status monitoring thread is completed. -BlueGreenStatusMonitor.UnhandledException=[BlueGreenStatusMonitor] [{}] Unhandled exception. -BlueGreenStatusMonitor.UnhandledSqlException=[BlueGreenStatusMonitor] [{}] Unhandled SQLException. +BlueGreenStatusMonitor.UnexpectedDialect=[BlueGreenStatusMonitor] Attempted to create a BlueGreenStatusMonitor, but a BlueGreenDialect is required. The current dialect is {}. +BlueGreenStatusMonitor.UnhandledException=[BlueGreenStatusMonitor] [{}] Unhandled exception: {}. BlueGreenStatusMonitor.UsesVersion=[BlueGreenStatusMonitor] [{}] Blue/Green deployment uses version '{}' which the driver doesn't support. Version '{}' will be used instead. BlueGreenStatusProvider.BlueDnsCompleted=[BlueGreenStatusProvider] [bgdId: '{}'] Blue DNS update completed. @@ -337,13 +337,17 @@ StaleDnsPlugin.RequireDynamicProvider=[StaleDnsPlugin] A dynamic host list provi SubstituteConnectRouting.InProgressCantOpenConnection=[SubstituteConnectRouting] Blue/Green Deployment switchover is in progress. Can't establish connection to '{}'. SubstituteConnectRouting.RequireIamHost=[SubstituteConnectRouting] Connecting with IP address when IAM authentication is enabled requires an 'iamHost' parameter. -SuspendConnectRouting.InProgressHoldConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is in progress. The 'connect' call will be delayed until switchover is completed. -SuspendConnectRouting.InProgressTryConnectLater=[SuspendConnectRouting] Blue/Green Deployment switchover is still in progress after {} ms. Try to connect again later. +SuspendConnectRouting.InProgressSuspendConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is in progress. The 'connect' call will be delayed until switchover is completed. +SuspendConnectRouting.InProgressTryConnectLater=[SuspendConnectRouting] Blue/Green Deployment switchover is still in progress after {} seconds. Try to connect again later. SuspendConnectRouting.SwitchoverCompleteContinueWithConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is completed. Continue with connect call. The call was held for {} ms. -SuspendExecuteRouting.InProgressHoldMethod=[SuspendExecuteRouting] Blue/Green Deployment switchover is in progress. Hold '{}' call until switchover is completed. -SuspendExecuteRouting.StillInProgressTryMethodLater=[SuspendExecuteRouting] Blue/Green Deployment switchover is still in progress after {} ms. Try '{}' again later. -SuspendExecuteRouting.SwitchoverCompletedContinueWithMethod=[SuspendExecuteRouting] Blue/Green Deployment switchover is completed. Continue with '{}' call. The call was held for {} ms. +SuspendExecuteRouting.InProgressSuspendMethod=[SuspendExecuteRouting] Blue/Green Deployment switchover is in progress. Suspend '{}' call until switchover is completed. +SuspendExecuteRouting.InProgressTryMethodLater=[SuspendExecuteRouting] Blue/Green Deployment switchover is still in progress after {} ms. Try '{}' again later. +SuspendExecuteRouting.SwitchoverCompleteContinueWithMethod=[SuspendExecuteRouting] Blue/Green Deployment switchover is completed. Continue with '{}' call. The call was held for {} ms. + +SuspendUntilCorrespondingNodeFoundConnectRouting.CompletedContinueWithConnect=[SuspendConnectUntilCorrespondingNodeFoundConnectRouting] Blue/Green Deployment status is completed. Continue with 'connect' call. The call was held for {} ms. +SuspendUntilCorrespondingNodeFoundConnectRouting.CorrespondingNodeFoundContinueWithConnect=[SuspendConnectUntilCorrespondingNodeFoundConnectRouting] The corresponding node for '{}' was found. Continue with 'connect' call. The call was held for {} ms. +SuspendUntilCorrespondingNodeFoundConnectRouting.CorrespondingNodeNotFoundTryConnectLater=[SuspendConnectUntilCorrespondingNodeFoundConnectRouting] Blue/Green Deployment switchover is still in progress and the corresponding node for '{}' was not found after {} ms. Try to connect again later. Testing.CantParse=[Testing] Can't parse {}. Testing.DisabledConnectivity=[Testing] Disabled connectivity to {}. diff --git a/aws_advanced_python_wrapper/utils/atomic.py b/aws_advanced_python_wrapper/utils/atomic.py index 62eba40c..13888ad0 100644 --- a/aws_advanced_python_wrapper/utils/atomic.py +++ b/aws_advanced_python_wrapper/utils/atomic.py @@ -49,3 +49,10 @@ def decrement_and_get(self): with self._lock: self._value -= 1 return self._value + + def compare_and_set(self, expected_value: int, new_value: int) -> bool: + with self._lock: + if self._value == expected_value: + self._value = new_value + return True + return False diff --git a/aws_advanced_python_wrapper/utils/properties.py b/aws_advanced_python_wrapper/utils/properties.py index bfa02038..2fe4edd6 100644 --- a/aws_advanced_python_wrapper/utils/properties.py +++ b/aws_advanced_python_wrapper/utils/properties.py @@ -29,11 +29,19 @@ def __init__(self, name: str, description: str, default_value: Optional[Any] = N self.default_value = default_value self.description = description + def __str__(self): + return f"WrapperProperty(name={self.name}, default_value={self.default_value}" + def get(self, props: Properties) -> Optional[str]: if self.default_value: return props.get(self.name, self.default_value) return props.get(self.name) + def get_or_default(self, props: Properties) -> str: + if not self.default_value: + raise ValueError(f"No default value found for property {self}") + return props.get(self.name, self.default_value) + def get_int(self, props: Properties) -> int: if self.default_value: return int(props.get(self.name, self.default_value)) @@ -321,6 +329,37 @@ class WrapperProperties: "Interval in milliseconds between measuring response time to a database host", 30_000) + # Blue/Green + BG_CONNECT_TIMEOUT_MS = WrapperProperty( + "bg_connect_timeout_ms", + "Connect timeout (in msec) during Blue/Green Deployment switchover.", + 30_000) + BG_ID = WrapperProperty( + "bg_id", + "Blue/Green Deployment identifier that helps the driver to distinguish different deployments.", + "1") + BG_INTERVAL_BASELINE_MS = WrapperProperty( + "bg_interval_baseline_ms", + "Baseline Blue/Green Deployment status checking interval (in msec).", + 60_000) + BG_INTERVAL_INCREASED_MS = WrapperProperty( + "bg_interval_increased_ms", + "Increased Blue/Green Deployment status checking interval (in msec).", + 1_000) + BG_INTERVAL_HIGH_MS = WrapperProperty( + "bg_interval_high_ms", + "High Blue/Green Deployment status checking interval (in msec).", + 100) + BG_SWITCHOVER_TIMEOUT_MS = WrapperProperty( + "bg_switchover_timeout_ms", + "Blue/Green Deployment switchover timeout (in msec).", + 180_000) # 3 minutes + BG_SUSPEND_NEW_BLUE_CONNECTIONS = WrapperProperty( + "bg_suspend_new_blue_connections", + "Enables Blue/Green Deployment switchover to suspend new blue connection requests while the " + "switchover process is in progress.", + False) + # Telemetry ENABLE_TELEMETRY = WrapperProperty( "enable_telemetry", diff --git a/aws_advanced_python_wrapper/utils/rdsutils.py b/aws_advanced_python_wrapper/utils/rdsutils.py index 28b3f014..2eac080c 100644 --- a/aws_advanced_python_wrapper/utils/rdsutils.py +++ b/aws_advanced_python_wrapper/utils/rdsutils.py @@ -110,6 +110,10 @@ class RdsUtils: IP_V6 = r"^[0-9a-fA-F]{1,4}(:[0-9a-fA-F]{1,4}){7}" IP_V6_COMPRESSED = r"^(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,5})?)::(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,5})?)" + BG_OLD_HOST_PATTERN = r".*(?P-old1)\." + BG_GREEN_HOST_PATTERN = r".*(?P-green-[0-9a-z]{6})\." + BG_GREEN_HOST_ID_PATTERN = r"(.*)-green-[0-9a-z]{6}" + DNS_GROUP = "dns" DOMAIN_GROUP = "domain" INSTANCE_GROUP = "instance" @@ -245,6 +249,42 @@ def identify_rds_type(self, host: Optional[str]) -> RdsUrlType: return RdsUrlType.OTHER + def is_green_instance(self, host: str) -> bool: + if not host: + return False + + return search(RdsUtils.BG_GREEN_HOST_PATTERN, host) is not None + + def is_not_old_instance(self, host: str) -> bool: + if host is None or not host.strip(): + return False + return search(RdsUtils.BG_OLD_HOST_PATTERN, host) is None + + def is_not_green_or_old_instance(self, host: str) -> bool: + if not host: + return False + + return search(RdsUtils.BG_GREEN_HOST_PATTERN, host) is None and \ + search(RdsUtils.BG_OLD_HOST_PATTERN, host) is None + + def remove_green_instance_prefix(self, host: str) -> str: + if not host: + return host + + host_match = search(RdsUtils.BG_GREEN_HOST_PATTERN, host) + if host_match is None: + host_id_match = search(RdsUtils.BG_GREEN_HOST_ID_PATTERN, host) + if host_id_match: + return host_id_match.group(0) + else: + return host + + prefix = host_match.group("prefix") + if not prefix: + return host + + return host.replace(f"{prefix}.", ".") + def _find(self, host: str, patterns: list): if not host or not host.strip(): return None diff --git a/aws_advanced_python_wrapper/utils/value_container.py b/aws_advanced_python_wrapper/utils/value_container.py new file mode 100644 index 00000000..a49f8087 --- /dev/null +++ b/aws_advanced_python_wrapper/utils/value_container.py @@ -0,0 +1,76 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Generic, TypeVar, Union, cast + +V = TypeVar('V') + + +class Empty(object): + """An empty sentinel object used to differentiate between None vs an empty value.""" + pass + + +class ValueContainer(Generic[V]): + """A container object which may or may not contain a non-None value.""" + + # Sentinel object to represent an empty ValueContainer + _EMPTY = Empty() + + def __init__(self, value: Union[Empty, V] = _EMPTY): + self._value = value + + @classmethod + def of(cls, value: V) -> 'ValueContainer[V]': + """Returns a ValueContainer with the specified non-None value.""" + if value is None: + raise ValueError("Value cannot be None") + return cls(value) + + @classmethod + def empty(cls) -> 'ValueContainer[V]': + """Returns an empty ValueContainer instance.""" + return cls() + + def is_present(self) -> bool: + """Returns true if a value is present.""" + return self._value is not self._EMPTY + + def is_empty(self) -> bool: + """Returns true if no value is present.""" + return self._value is self._EMPTY + + def get(self) -> V: + """Returns the value if present, otherwise raises ValueError.""" + if self._value is self._EMPTY: + raise ValueError("No value present") + return cast('V', self._value) + + def or_else(self, other: V) -> V: + """Returns the value if present, otherwise returns other.""" + return cast('V', self._value) if self.is_present() else other + + def __eq__(self, other: object) -> bool: + """Checks if this ValueContainer is equal to another object.""" + if not isinstance(other, ValueContainer): + return False + if self.is_empty() and other.is_empty(): + return True + if self.is_empty() or other.is_empty(): + return False + return self._value == other._value + + def __str__(self) -> str: + """Returns a string representation of this ValueContainer.""" + return "ValueContainer.empty" if self.is_empty() else f"ValueContainer[{self._value}]" diff --git a/tests/unit/test_blue_green_plugin.py b/tests/unit/test_blue_green_plugin.py index e42f31bc..478bdb8f 100644 --- a/tests/unit/test_blue_green_plugin.py +++ b/tests/unit/test_blue_green_plugin.py @@ -11,12 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import time +from copy import deepcopy +from datetime import datetime from types import MappingProxyType -from aws_advanced_python_wrapper.blue_green_plugin import PassThroughConnectRouting, BlueGreenRole, \ - PassThroughExecuteRouting, BlueGreenStatus, BlueGreenPhase, BlueGreenInterimStatus -from aws_advanced_python_wrapper.hostinfo import HostInfo +from aws_advanced_python_wrapper.blue_green_plugin import ( + BlueGreenInterimStatus, BlueGreenPhase, BlueGreenRole, BlueGreenStatus, + PassThroughConnectRouting, PassThroughExecuteRouting, + SubstituteConnectRouting) +from aws_advanced_python_wrapper.host_availability import HostAvailability +from aws_advanced_python_wrapper.hostinfo import HostInfo, HostRole +from aws_advanced_python_wrapper.utils.concurrent import ConcurrentDict +from aws_advanced_python_wrapper.utils.value_container import ValueContainer + # TODO: remove unnecessary tests def test_status_str(): @@ -32,15 +40,19 @@ def test_status_str(): BlueGreenStatus("asdf", BlueGreenPhase.PREPARATION, connect_routings, execute_routings, role_by_endpoint)) print(f"\n{status}") + def test_interim_status_str(): + start_ips = ConcurrentDict() + start_ips.put_if_absent("instance-1", ValueContainer.of("1.1.1.1")) + start_ips.put_if_absent("instance-2", ValueContainer.empty()) status = BlueGreenInterimStatus( BlueGreenPhase.CREATED, "1.0", 5432, (HostInfo("instance-1"), HostInfo("instance-2")), - {"instance-1": "1.1.1.1", "instance-2": None}, + start_ips, (HostInfo("instance-1"), HostInfo("instance-2")), - {"instance-1": "1.1.1.1", "instance-2": None}, + start_ips, {"instance-1", "instance-2"}, True, True, @@ -49,3 +61,26 @@ def test_interim_status_str(): print(f"\n{status}") + +def test_substitute_connect_routing(): + example_host = HostInfo("instance-1sdfsaklfdjsaklfdjsaklfjslkdfjslkdfjsa", 5432, HostRole.WRITER, HostAvailability.AVAILABLE) + iam_hosts = (example_host, example_host, example_host) + routing = SubstituteConnectRouting( + "instance-1:5432", + BlueGreenRole.SOURCE, + example_host, + iam_hosts, + lambda host: None + ) + + print(f"\n{routing}") + + +def test_host_copy(): + h1 = HostInfo("localhost", 5432, HostRole.READER, HostAvailability.UNAVAILABLE, weight=5, host_id="localhost", last_update_time=datetime.now()) + h2 = deepcopy(h1) + assert h1 == h2 + + +def test_time(): + print(time.time()) diff --git a/tests/unit/test_failover_plugin.py b/tests/unit/test_failover_plugin.py index 4610b316..a9e0c856 100644 --- a/tests/unit/test_failover_plugin.py +++ b/tests/unit/test_failover_plugin.py @@ -240,7 +240,7 @@ def test_failover_writer(plugin_service_mock, host_list_provider_service_mock, i def test_failover_reader_with_valid_failed_host(plugin_service_mock, host_list_provider_service_mock, init_host_provider_func_mock, conn_mock, reader_failover_handler_mock): host: HostInfo = HostInfo("host") - host._availability = HostAvailability.AVAILABLE + host.availability = HostAvailability.AVAILABLE host._aliases = ["alias1", "alias2"] hosts: Tuple[HostInfo, ...] = (host, ) type(plugin_service_mock).hosts = PropertyMock(return_value=hosts) @@ -262,7 +262,7 @@ def test_failover_reader_with_valid_failed_host(plugin_service_mock, host_list_p def test_failover_reader_with_no_failed_host(plugin_service_mock, host_list_provider_service_mock, init_host_provider_func_mock, reader_failover_handler_mock): host: HostInfo = HostInfo("host") - host._availability = HostAvailability.AVAILABLE + host.availability = HostAvailability.AVAILABLE host._aliases = ["alias1", "alias2"] hosts: Tuple[HostInfo, ...] = (host, ) type(plugin_service_mock).hosts = PropertyMock(return_value=hosts) diff --git a/tests/unit/test_hostinfo.py b/tests/unit/test_hostinfo.py index 29249ad9..cab1fdfa 100644 --- a/tests/unit/test_hostinfo.py +++ b/tests/unit/test_hostinfo.py @@ -27,7 +27,7 @@ def test_host_info_defaults(): assert len(host_info.aliases) == 0 assert len(host_info._all_aliases) == 1 assert host_info.role == HostRole.WRITER - assert host_info._availability == HostAvailability.AVAILABLE + assert host_info.availability == HostAvailability.AVAILABLE assert list(host_info._all_aliases)[0] == "testhost" diff --git a/tests/unit/test_reader_failover_handler.py b/tests/unit/test_reader_failover_handler.py index 180a8770..c43e005f 100644 --- a/tests/unit/test_reader_failover_handler.py +++ b/tests/unit/test_reader_failover_handler.py @@ -86,8 +86,8 @@ def force_connect_side_effect(host_info, properties, timeout_event) -> Connectio plugin_service_mock.force_connect.side_effect = force_connect_side_effect plugin_service_mock.is_network_exception.return_value = True - hosts[2]._availability = HostAvailability.UNAVAILABLE - hosts[4]._availability = HostAvailability.UNAVAILABLE + hosts[2].availability = HostAvailability.UNAVAILABLE + hosts[4].availability = HostAvailability.UNAVAILABLE target: ReaderFailoverHandler = ReaderFailoverHandlerImpl(plugin_service_mock, props) result: ReaderFailoverResult = target.failover(hosts, current_host) @@ -115,8 +115,8 @@ def force_connect_side_effect(host_info, properties, timeout_event) -> Connectio plugin_service_mock.force_connect.side_effect = force_connect_side_effect - hosts[2]._availability = HostAvailability.UNAVAILABLE - hosts[4]._availability = HostAvailability.UNAVAILABLE + hosts[2].availability = HostAvailability.UNAVAILABLE + hosts[4].availability = HostAvailability.UNAVAILABLE # Set max failover timeout to 5 seconds target: ReaderFailoverHandler = ReaderFailoverHandlerImpl(plugin_service_mock, props, 5, 30) @@ -222,9 +222,9 @@ def force_connect_side_effect(host_info, properties, timeout_event) -> Connectio def test_get_host_tuples_by_priority(plugin_service_mock, connection_mock, default_properties, default_hosts): hosts = default_hosts - hosts[2]._availability = HostAvailability.UNAVAILABLE - hosts[4]._availability = HostAvailability.UNAVAILABLE - hosts[5]._availability = HostAvailability.UNAVAILABLE + hosts[2].availability = HostAvailability.UNAVAILABLE + hosts[4].availability = HostAvailability.UNAVAILABLE + hosts[5].availability = HostAvailability.UNAVAILABLE hosts_by_priority = ReaderFailoverHandlerImpl.get_hosts_by_priority(hosts, False) @@ -233,7 +233,7 @@ def test_get_host_tuples_by_priority(plugin_service_mock, connection_mock, defau # expecting active readers while (i < len(hosts_by_priority) and hosts_by_priority[i].role == HostRole.READER and - hosts_by_priority[i]._availability == HostAvailability.AVAILABLE): + hosts_by_priority[i].availability == HostAvailability.AVAILABLE): i += 1 # expecting a writer @@ -243,7 +243,7 @@ def test_get_host_tuples_by_priority(plugin_service_mock, connection_mock, defau # expecting down readers while (i < len(hosts_by_priority) and hosts_by_priority[i].role == HostRole.READER and - hosts_by_priority[i]._availability == HostAvailability.UNAVAILABLE): + hosts_by_priority[i].availability == HostAvailability.UNAVAILABLE): i += 1 assert i == len(hosts_by_priority) @@ -252,9 +252,9 @@ def test_get_host_tuples_by_priority(plugin_service_mock, connection_mock, defau def test_get_reader_tuples_by_priority(plugin_service_mock, connection_mock, default_properties, default_hosts): hosts = default_hosts - hosts[2]._availability = HostAvailability.UNAVAILABLE - hosts[4]._availability = HostAvailability.UNAVAILABLE - hosts[5]._availability = HostAvailability.UNAVAILABLE + hosts[2].availability = HostAvailability.UNAVAILABLE + hosts[4].availability = HostAvailability.UNAVAILABLE + hosts[5].availability = HostAvailability.UNAVAILABLE hosts_by_priority = ReaderFailoverHandlerImpl.get_reader_hosts_by_priority(hosts) @@ -263,13 +263,13 @@ def test_get_reader_tuples_by_priority(plugin_service_mock, connection_mock, def # expecting active readers while (i < len(hosts_by_priority) and hosts_by_priority[i].role == HostRole.READER and - hosts_by_priority[i]._availability == HostAvailability.AVAILABLE): + hosts_by_priority[i].availability == HostAvailability.AVAILABLE): i += 1 # expecting down readers while (i < len(hosts_by_priority) and hosts_by_priority[i].role == HostRole.READER and - hosts_by_priority[i]._availability == HostAvailability.UNAVAILABLE): + hosts_by_priority[i].availability == HostAvailability.UNAVAILABLE): i += 1 assert i == len(hosts_by_priority) @@ -285,7 +285,7 @@ def test_host_failover_strict_reader_enabled(plugin_service_mock, connection_moc assert hosts_by_priority == (reader, ) # should select the reader even if unavailable - reader._availability = HostAvailability.UNAVAILABLE + reader.availability = HostAvailability.UNAVAILABLE hosts_by_priority = ReaderFailoverHandlerImpl.get_hosts_by_priority(hosts, True) assert hosts_by_priority == (reader,) From 026233852d220753adb1f9a72e2cd900f1fa1143 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 12 Jun 2025 13:43:21 -0700 Subject: [PATCH 07/41] Done BlueGreenStatusProvider, mypy/isort/flake8 pass --- .../blue_green_plugin.py | 873 ++++++++++++++++-- ...dvanced_python_wrapper_messages.properties | 1 + .../utils/concurrent.py | 42 +- .../utils/properties.py | 4 +- tests/unit/test_blue_green_plugin.py | 86 -- 5 files changed, 840 insertions(+), 166 deletions(-) delete mode 100644 tests/unit/test_blue_green_plugin.py diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index a2551781..34429d77 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -15,8 +15,9 @@ from __future__ import annotations import socket +from datetime import datetime from time import perf_counter_ns -from typing import TYPE_CHECKING, FrozenSet, cast +from typing import TYPE_CHECKING, FrozenSet, List, cast from aws_advanced_python_wrapper.database_dialect import BlueGreenDialect from aws_advanced_python_wrapper.host_list_provider import HostListProvider @@ -33,18 +34,19 @@ from copy import copy from dataclasses import dataclass from enum import Enum, auto -from threading import Condition, Event, Thread -from types import MappingProxyType +from threading import Condition, Event, RLock, Thread from typing import (Any, Callable, ClassVar, Dict, Optional, Protocol, Set, Tuple) -from aws_advanced_python_wrapper.errors import AwsWrapperError +from aws_advanced_python_wrapper.errors import (AwsWrapperError, + UnsupportedOperationError) from aws_advanced_python_wrapper.host_availability import HostAvailability -from aws_advanced_python_wrapper.hostinfo import HostInfo +from aws_advanced_python_wrapper.hostinfo import HostInfo, HostRole from aws_advanced_python_wrapper.iam_plugin import IamAuthPlugin from aws_advanced_python_wrapper.plugin import Plugin, PluginFactory from aws_advanced_python_wrapper.utils.atomic import AtomicInt -from aws_advanced_python_wrapper.utils.concurrent import ConcurrentDict +from aws_advanced_python_wrapper.utils.concurrent import (ConcurrentDict, + ConcurrentSet) from aws_advanced_python_wrapper.utils.log import Logger from aws_advanced_python_wrapper.utils.messages import Messages from aws_advanced_python_wrapper.utils.properties import (Properties, @@ -88,19 +90,19 @@ def parse_phase(phase_str: Optional[str]) -> BlueGreenPhase: if not phase_str: return BlueGreenPhase.NOT_CREATED - match phase_str.upper(): - case "AVAILABLE": - return BlueGreenPhase.CREATED - case "SWITCHOVER_INITIATED": - return BlueGreenPhase.PREPARATION - case "SWITCHOVER_IN_PROGRESS": - return BlueGreenPhase.IN_PROGRESS - case "SWITCHOVER_IN_POST_PROCESSING": - return BlueGreenPhase.POST - case "SWITCHOVER_COMPLETED": - return BlueGreenPhase.COMPLETED - case _: - raise ValueError(Messages.get_formatted("BlueGreenPhase.UnknownStatus", phase_str)) + phase_upper = phase_str.upper() + if phase_upper == "AVAILABLE": + return BlueGreenPhase.CREATED + elif phase_upper == "SWITCHOVER_INITIATED": + return BlueGreenPhase.PREPARATION + elif phase_upper == "SWITCHOVER_IN_PROGRESS": + return BlueGreenPhase.IN_PROGRESS + elif phase_upper == "SWITCHOVER_IN_POST_PROCESSING": + return BlueGreenPhase.POST + elif phase_upper == "SWITCHOVER_COMPLETED": + return BlueGreenPhase.COMPLETED + else: + raise ValueError(Messages.get_formatted("BlueGreenPhase.UnknownStatus", phase_str)) class BlueGreenRole(Enum): @@ -112,13 +114,12 @@ def parse_role(role_str: str, version: str) -> BlueGreenRole: if "1.0" != version: raise ValueError(Messages.get_formatted("BlueGreenRole.UnknownVersion", version)) - match role_str: - case "BLUE_GREEN_DEPLOYMENT_SOURCE": - return BlueGreenRole.SOURCE - case "BLUE_GREEN_DEPLOYMENT_TARGET": - return BlueGreenRole.TARGET - case _: - raise ValueError(Messages.get_formatted("BlueGreenRole.UnknownRole", role_str)) + if role_str == "BLUE_GREEN_DEPLOYMENT_SOURCE": + return BlueGreenRole.SOURCE + elif role_str == "BLUE_GREEN_DEPLOYMENT_TARGET": + return BlueGreenRole.TARGET + else: + raise ValueError(Messages.get_formatted("BlueGreenRole.UnknownRole", role_str)) class BlueGreenStatus: @@ -126,24 +127,31 @@ def __init__( self, bg_id: str, phase: BlueGreenPhase, - connect_routing: Tuple[ConnectRouting, ...] = (), - execute_routing: Tuple[ExecuteRouting, ...] = (), - role_by_host: MappingProxyType[str, BlueGreenRole] = MappingProxyType({}), - node_pairs_by_host: MappingProxyType[str, Tuple[HostInfo, Optional[HostInfo]]] = MappingProxyType({})): + connect_routings: Optional[List[ConnectRouting]] = None, + execute_routings: Optional[List[ExecuteRouting]] = None, + role_by_host: Optional[ConcurrentDict[str, BlueGreenRole]] = None, + corresponding_nodes: Optional[ConcurrentDict[str, Tuple[HostInfo, Optional[HostInfo]]]] = None): self.bg_id = bg_id self.phase = phase - self.connect_routings = connect_routing - self.execute_routings = execute_routing - self.role_by_endpoint = role_by_host - self.node_pairs_by_host = node_pairs_by_host + self.connect_routings = [] if connect_routings is None else list(connect_routings) + self.execute_routings = [] if execute_routings is None else list(execute_routings) + self.roles_by_endpoint: ConcurrentDict[str, BlueGreenRole] = ConcurrentDict() + if role_by_host is not None: + self.roles_by_endpoint.put_all(role_by_host) + + self.corresponding_nodes: ConcurrentDict[str, Tuple[HostInfo, Optional[HostInfo]]] = ConcurrentDict() + if corresponding_nodes is not None: + self.corresponding_nodes.put_all(corresponding_nodes) + + self.cv = Condition() def get_role(self, host_info: HostInfo) -> Optional[BlueGreenRole]: - return self.role_by_endpoint.get(host_info.host.lower()) + return self.roles_by_endpoint.get(host_info.host.lower()) def __str__(self) -> str: connect_routings_str = ',\n '.join(str(cr) for cr in self.connect_routings) execute_routings_str = ',\n '.join(str(er) for er in self.execute_routings) - role_mappings = ',\n '.join(f"{endpoint}: {role}" for endpoint, role in self.role_by_endpoint.items()) + role_mappings = ',\n '.join(f"{endpoint}: {role}" for endpoint, role in self.roles_by_endpoint.items()) return (f"{self.__class__.__name__}(\n" f" id='{self.bg_id}',\n" @@ -283,7 +291,6 @@ class BaseRouting: _MIN_SLEEP_MS = 50 def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): - self._cv = Condition() self._endpoint = endpoint # host and optionally port as well self._bg_role = bg_role @@ -296,8 +303,8 @@ def delay(self, delay_ms: int, bg_status: Optional[BlueGreenStatus], plugin_serv return while bg_status is plugin_service.get_status(BlueGreenStatus, bg_id) and time.time() < end_time_sec: - with self._cv: - self._cv.wait(min_delay_ms / 1_000) + with bg_status.cv: + bg_status.cv.wait(min_delay_ms / 1_000) def is_match(self, host_info: Optional[HostInfo], bg_role: BlueGreenRole) -> bool: if self._endpoint is None: @@ -348,11 +355,11 @@ class SubstituteConnectRouting(BaseRouting, ConnectRouting): def __init__( self, - endpoint: Optional[str], - bg_role: Optional[BlueGreenRole], substitute_host_info: HostInfo, - iam_hosts: Optional[Tuple[HostInfo, ...]], - iam_auth_success_handler: Optional[IamAuthSuccessHandler]): + endpoint: Optional[str] = None, + bg_role: Optional[BlueGreenRole] = None, + iam_hosts: Optional[Tuple[HostInfo, ...]] = None, + iam_auth_success_handler: Optional[Callable[[str], None]] = None): super().__init__(endpoint, bg_role) self._substitute_host_info = substitute_host_info self._iam_hosts = iam_hosts @@ -361,9 +368,9 @@ def __init__( def __str__(self): iam_hosts_str = ',\n '.join(str(iam_host) for iam_host in self._iam_hosts) return (f"{self.__class__.__name__}(\n" + f" substitute_host_info={self._substitute_host_info},\n" f" endpoint={self._endpoint},\n" f" bg_role={self._bg_role},\n" - f" substitute_host_info={self._substitute_host_info},\n" f" iam_hosts=[\n" f" {iam_hosts_str}\n" f" ],\n" @@ -403,7 +410,7 @@ def apply( conn = plugin_service.connect(rerouted_host_info, rerouted_props) if self._iam_auth_success_handler is not None: try: - self._iam_auth_success_handler.on_iam_success(iam_host.host) + self._iam_auth_success_handler(iam_host.host) except Exception: pass # do nothing @@ -503,7 +510,7 @@ def apply( SuspendUntilCorrespondingNodeFoundConnectRouting._TELEMETRY_SWITCHOVER, TelemetryTraceLevel.NESTED) bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) - corresponding_pair = None if bg_status is None else bg_status.node_pairs_by_host.get(host_info.host) + corresponding_pair = None if bg_status is None else bg_status.corresponding_nodes.get(host_info.host) timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) start_time_sec = time.time() @@ -518,7 +525,7 @@ def apply( self.delay( SuspendUntilCorrespondingNodeFoundConnectRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) - corresponding_pair = None if bg_status is None else bg_status.node_pairs_by_host.get(host_info.host) + corresponding_pair = None if bg_status is None else bg_status.corresponding_nodes.get(host_info.host) if bg_status is None or bg_status.phase == BlueGreenPhase.COMPLETED: logger.debug( @@ -629,14 +636,14 @@ def __init__(self, plugin_service: PluginService, props: Properties): self._plugin_service = plugin_service self._props = props self._telemetry_factory = plugin_service.get_telemetry_factory() - self._provider_supplier: Callable = \ + self._provider_supplier: Callable[[PluginService, Properties, str], BlueGreenStatusProvider] = \ lambda _plugin_service, _props, bg_id: BlueGreenStatusProvider(_plugin_service, _props, bg_id) self._bg_id = WrapperProperties.BG_ID.get_or_default(props).strip().lower() self._rds_utils = RdsUtils() self._bg_status: Optional[BlueGreenStatus] = None self._is_iam_in_use = False - self._start_time_nano = AtomicInt(0) - self._end_time_nano = AtomicInt(0) + self._start_time_ns = AtomicInt(0) + self._end_time_ns = AtomicInt(0) self._SUBSCRIBED_METHODS.update(self._plugin_service.network_bound_methods) @@ -670,7 +677,7 @@ def connect( if not routing: return self._open_direct_connection(connect_func, is_initial_connection) - self._start_time_nano.set(perf_counter_ns()) + self._start_time_ns.set(perf_counter_ns()) conn: Optional[Connection] = None while routing is not None and conn is None: conn = routing.apply(self, host_info, props, is_initial_connection, connect_func, self._plugin_service) @@ -682,7 +689,7 @@ def connect( routing = \ next((r for r in self._bg_status.connect_routings if r.is_match(host_info, bg_role)), None) - self._end_time_nano.set(perf_counter_ns()) + self._end_time_ns.set(perf_counter_ns()) if conn is None: conn = connect_func() @@ -691,12 +698,12 @@ def connect( return conn finally: - if self._start_time_nano.get() > 0: - self._end_time_nano.compare_and_set(0, perf_counter_ns()) + if self._start_time_ns.get() > 0: + self._end_time_ns.compare_and_set(0, perf_counter_ns()) def _reset_routing_time(self): - self._start_time_nano.set(0) - self._end_time_nano.set(0) + self._start_time_ns.set(0) + self._end_time_ns.set(0) def _open_direct_connection(self, connect_func: Callable, is_initial_connection: bool) -> Connection: conn = connect_func() @@ -732,7 +739,7 @@ def execute(self, target: type, method_name: str, execute_func: Callable, *args: return execute_func() result: ValueContainer[Any] = ValueContainer.empty() - self._start_time_nano.set(perf_counter_ns()) + self._start_time_ns.set(perf_counter_ns()) while routing is not None and result is None: result = routing.apply( self, @@ -748,14 +755,14 @@ def execute(self, target: type, method_name: str, execute_func: Callable, *args: routing = \ next((r for r in self._bg_status.execute_routings if r.is_match(host_info, bg_role)), None) - self._end_time_nano.set(perf_counter_ns()) + self._end_time_ns.set(perf_counter_ns()) if result.is_present(): return result.get() return execute_func() finally: - if self._start_time_nano.get() > 0: - self._end_time_nano.compare_and_set(0, perf_counter_ns()) + if self._start_time_ns.get() > 0: + self._end_time_ns.compare_and_set(0, perf_counter_ns()) class BlueGreenPluginFactory(PluginFactory): @@ -763,9 +770,7 @@ def get_instance(self, plugin_service: PluginService, props: Properties) -> Plug return BlueGreenPlugin(plugin_service, props) -class BlueGreenInterimStatusProcessor(Protocol): - def process_interim_status(self, role: BlueGreenRole, interim_status: BlueGreenInterimStatus): - ... +BlueGreenInterimStatusProcessor = Callable[[BlueGreenRole, BlueGreenInterimStatus], None] class BlueGreenStatusMonitor: @@ -798,11 +803,11 @@ def __init__( self._should_collect_ip_addresses.set() self._should_collect_topology = Event() self._should_collect_topology.set() - self._use_ip_address = Event() + self.use_ip_address = Event() self._panic_mode = Event() self._panic_mode.set() - self._stop = Event() - self._interval_rate = BlueGreenIntervalRate.BASELINE + self.stop = Event() + self.interval_rate = BlueGreenIntervalRate.BASELINE self._host_list_provider: Optional[HostListProvider] = None self._start_topology: Tuple[HostInfo, ...] = () self._current_topology: Tuple[HostInfo, ...] = () @@ -832,12 +837,12 @@ def __init__( def _run(self): try: - while not self._stop.is_set(): + while not self.stop.is_set(): try: old_phase = self._current_phase self._open_connection() self._collect_status() - self._collect_topology() + self.collect_topology() self._collect_ip_addresses() self._update_ip_address_flags() @@ -845,7 +850,7 @@ def _run(self): logger.debug("BlueGreenStatusMonitor.StatusChanged", self._bg_role, self._current_phase) if self._interim_status_processor is not None: - self._interim_status_processor.process_interim_status( + self._interim_status_processor( self._bg_role, BlueGreenInterimStatus( self._current_phase, @@ -861,7 +866,7 @@ def _run(self): self._all_topology_changed) ) - interval_rate = BlueGreenIntervalRate.HIGH if self._panic_mode.is_set() else self._interval_rate + interval_rate = BlueGreenIntervalRate.HIGH if self._panic_mode.is_set() else self.interval_rate delay_ms = self._status_check_intervals_ms.get( interval_rate, BlueGreenStatusMonitor._DEFAULT_STATUS_CHECK_INTERVAL_MS) self._delay(delay_ms) @@ -899,7 +904,7 @@ def _open_connection_task(self): self._is_host_info_correct = False try: - if self._use_ip_address.is_set() and ip_address is not None: + if self.use_ip_address.is_set() and ip_address is not None: ip_host_info = copy(host_info) ip_host_info.host = ip_address props_copy = copy(self._props) @@ -1070,18 +1075,18 @@ def _is_connection_closed(self, conn: Optional[Connection]) -> bool: def _delay(self, delay_ms: int): start_ns = perf_counter_ns() end_ns = start_ns + delay_ms * 1_000_000 - initial_interval_rate = self._interval_rate + initial_interval_rate = self.interval_rate initial_panic_mode_val = self._panic_mode.is_set() min_delay_sec = min(delay_ms, 50) / 1_000 - while self._interval_rate == initial_interval_rate and \ + while self.interval_rate == initial_interval_rate and \ perf_counter_ns() < end_ns and \ - not self._stop.is_set() and \ + not self.stop.is_set() and \ initial_panic_mode_val == self._panic_mode.is_set(): with self._cv: self._cv.wait(min_delay_sec) - def _collect_topology(self): + def collect_topology(self): if self._host_list_provider is None: return @@ -1105,8 +1110,7 @@ def _collect_ip_addresses(self): if self._should_collect_ip_addresses: self._start_ip_addresses_by_host.clear() - for k, v in self._current_ip_addresses_by_host.items(): - self._start_ip_addresses_by_host.put_if_absent(k, v) + self._start_ip_addresses_by_host.put_all(self._current_ip_addresses_by_host) def _update_ip_address_flags(self): if self._should_collect_topology: @@ -1144,6 +1148,11 @@ def _update_ip_address_flags(self): start_topology_hosts and all(node.host not in start_topology_hosts for node in current_topology_copy)) + def reset_collected_data(self): + self._start_ip_addresses_by_host.clear() + self._start_topology = [] + self._host_names.clear() + @dataclass class BlueGreenDbStatusInfo: @@ -1155,7 +1164,717 @@ class BlueGreenDbStatusInfo: class BlueGreenStatusProvider: + _MONITORING_PROPERTY_PREFIX: ClassVar[str] = "blue-green-monitoring-" + _DEFAULT_CONNECT_TIMEOUT_MS: ClassVar[int] = 10_000 + _DEFAULT_SOCKET_TIMEOUT_MS: ClassVar[int] = 10_000 + def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str): self._plugin_service = plugin_service self._props = props self._bg_id = bg_id + + self._monitors: List[Optional[BlueGreenStatusMonitor]] = [None, None] + self._interim_status_hashes = [0, 0] + self._latest_context_hash = 0 + self._interim_statuses: List[Optional[BlueGreenInterimStatus]] = [None, None] + self._host_ip_addresses: ConcurrentDict[str, ValueContainer[str]] = ConcurrentDict() + # The second element of the Tuple is None when no corresponding node is found. + self._corresponding_nodes: ConcurrentDict[str, Tuple[HostInfo, Optional[HostInfo]]] = ConcurrentDict() + # Keys are host URLs (port excluded) + self._roles_by_host: ConcurrentDict[str, BlueGreenRole] = ConcurrentDict() + self._iam_auth_success_hosts: ConcurrentDict[str, ConcurrentSet[str]] = ConcurrentDict() + self._green_node_name_change_times: ConcurrentDict[str, datetime] = ConcurrentDict() + self._summary_status: Optional[BlueGreenStatus] = None + self._latest_phase = BlueGreenPhase.NOT_CREATED + self._rollback = False + self._blue_dns_update_completed = False + self._green_dns_removed = False + self._green_topology_changed = False + self._all_green_nodes_changed_name = False + self._post_status_end_time_ns = 0 + self._process_status_lock = RLock() + self._status_check_intervals_ms: Dict[BlueGreenIntervalRate, int] = {} + self._phase_times_ns: ConcurrentDict[str, PhaseTimeInfo] = ConcurrentDict() + self._rds_utils = RdsUtils() + + self._switchover_timeout_ns = WrapperProperties.BG_SWITCHOVER_TIMEOUT_MS.get_int(props) * 1_000_000 + self._suspend_blue_connections_when_in_progress = ( + WrapperProperties.BG_SUSPEND_NEW_BLUE_CONNECTIONS.get_bool(props)) + self._status_check_intervals_ms.update({ + BlueGreenIntervalRate.BASELINE: WrapperProperties.BG_INTERVAL_BASELINE_MS.get_int(props), + BlueGreenIntervalRate.INCREASED: WrapperProperties.BG_INTERVAL_INCREASED_MS.get_int(props), + BlueGreenIntervalRate.HIGH: WrapperProperties.BG_INTERVAL_HIGH_MS.get_int(props) + }) + + dialect = self._plugin_service.database_dialect + if not isinstance(dialect, BlueGreenDialect): + # TODO: raise an error instead? Seems like we will encounter an error later if we don't raise one here. + logger.warning( + "BlueGreenStatusProvider.UnsupportedDialect", self._bg_id, dialect.__class__.__name__) + return + + current_host_info = self._plugin_service.current_host_info + if current_host_info is None: + logger.warning("BlueGreenStatusProvider.NoCurrentHostInfo", self._bg_id) + return + + self._monitors[BlueGreenRole.SOURCE.value] = BlueGreenStatusMonitor( + BlueGreenRole.SOURCE, + self._bg_id, + current_host_info, + self._plugin_service, + self._get_monitoring_props(), + self._status_check_intervals_ms, + self._process_interim_status) + self._monitors[BlueGreenRole.TARGET.value] = BlueGreenStatusMonitor( + BlueGreenRole.TARGET, + self._bg_id, + current_host_info, + self._plugin_service, + self._get_monitoring_props(), + self._status_check_intervals_ms, + self._process_interim_status) + + def _get_monitoring_props(self) -> Properties: + monitoring_props = copy(self._props) + for key in self._props.keys(): + if key.startswith(BlueGreenStatusProvider._MONITORING_PROPERTY_PREFIX): + new_key = key[len(BlueGreenStatusProvider._MONITORING_PROPERTY_PREFIX):] + monitoring_props[new_key] = self._props[key] + monitoring_props.pop(key, None) + + monitoring_props.put_if_absent( + WrapperProperties.CONNECT_TIMEOUT_SEC.name, BlueGreenStatusProvider._DEFAULT_CONNECT_TIMEOUT_MS / 1_000) + monitoring_props.put_if_absent( + WrapperProperties.SOCKET_TIMEOUT_SEC.name, BlueGreenStatusProvider._DEFAULT_SOCKET_TIMEOUT_MS / 1_000) + return monitoring_props + + def _process_interim_status(self, bg_role: BlueGreenRole, interim_status: BlueGreenInterimStatus): + with self._process_status_lock: + # TODO: don't need null check of interim_status in JDBC because interim_status is always not null + status_hash = interim_status.get_custom_hashcode() + context_hash = self._get_context_hash() + if self._interim_status_hashes[bg_role.value] == status_hash and self._latest_context_hash == context_hash: + # no changes detected + return + + logger.debug("BlueGreenStatusProvider.InterimStatus", self._bg_id, bg_role, interim_status) + self._update_phase(bg_role, interim_status) + + # Store interim_status and corresponding hash + self._interim_statuses[bg_role.value] = interim_status + self._interim_status_hashes[bg_role.value] = status_hash + self._latest_context_hash = context_hash + + # Update map of IP addresses. + self._host_ip_addresses.put_all(interim_status.start_ip_addresses_by_host_map) + + # Update role_by_host based on the provided host names. + self._roles_by_host.put_all({host_name.lower(): bg_role for host_name in interim_status.host_names}) + + self._update_corresponding_nodes() + self._update_summary_status(bg_role, interim_status) + self._update_monitors() + self._update_status_cache() + self._log_current_context() + self._log_switchover_final_summary() + self._reset_context_when_completed() + + def _get_context_hash(self) -> int: + result = self._get_value_hash(1, str(self._all_green_nodes_changed_name)) + result = self._get_value_hash(result, str(len(self._iam_auth_success_hosts))) + return result + + def _get_value_hash(self, current_hash: int, val: str) -> int: + return current_hash * 31 + hash(val) + + def _update_phase(self, bg_role: BlueGreenRole, interim_status: BlueGreenInterimStatus): + role_status = self._interim_statuses[bg_role.value] + latest_phase = BlueGreenPhase.NOT_CREATED if role_status is None else role_status.phase + if latest_phase is not None and \ + interim_status.phase is not None and \ + interim_status.phase.value < latest_phase.value: + self._rollback = True + logger.debug("BlueGreenStatusProvider.Rollback", self._bg_id) + + if interim_status.phase is None: + return + + # The phase should not move backwards unless we're rolling back. + if self._rollback: + if interim_status.phase.value < self._latest_phase.value: + self._latest_phase = interim_status.phase + else: + if interim_status.phase.value >= self._latest_phase.value: + self._latest_phase = interim_status.phase + + def _update_corresponding_nodes(self): + """ + Update corresponding nodes. The blue writer node is mapped to the green writer node, and each blue reader node is + mapped to a green reader node + """ + + self._corresponding_nodes.clear() + source_status = self._interim_statuses[BlueGreenRole.SOURCE.value] + target_status = self._interim_statuses[BlueGreenRole.TARGET.value] + if source_status is None or target_status is None: + return + + if source_status.start_topology and target_status.start_topology: + blue_writer_host_info = self._get_writer_host(BlueGreenRole.SOURCE) + green_writer_host_info = self._get_writer_host(BlueGreenRole.TARGET) + sorted_blue_readers = self._get_reader_hosts(BlueGreenRole.SOURCE) + sorted_green_readers = self._get_reader_hosts(BlueGreenRole.TARGET) + + if blue_writer_host_info is not None: + # green_writer_host_info may be None, but that will be handled properly by the corresponding routing. + self._corresponding_nodes.put( + blue_writer_host_info.host, (blue_writer_host_info, green_writer_host_info)) + + # TODO: port sorted blue reader length check to JDBC + if len(sorted_green_readers) > 0 and len(sorted_blue_readers) > 0: + # Map each to blue reader to a green reader. + green_index = 0 + for blue_host_info in sorted_blue_readers: + self._corresponding_nodes.put( + blue_host_info.host, (blue_host_info, sorted_green_readers[green_index])) + green_index += 1 + # The modulo operation prevents us from exceeding the bounds of sorted_green_readers if there are + # more blue readers than green readers. In this case, multiple blue readers may be mapped to the + # same green reader. + green_index %= len(sorted_green_readers) + else: + # There's no green readers - map all blue reader nodes to the green writer + for blue_host_info in sorted_blue_readers: + self._corresponding_nodes.put(blue_host_info.host, (blue_host_info, green_writer_host_info)) + + if source_status.host_names and target_status.host_names: + blue_hosts = source_status.host_names + green_hosts = target_status.host_names + + # Map blue writer cluster host to green writer cluster host. + blue_cluster_host = next( + (blue_host for blue_host in blue_hosts if self._rds_utils.is_writer_cluster_dns(blue_host)), + None) + green_cluster_host = next( + (green_host for green_host in green_hosts if self._rds_utils.is_writer_cluster_dns(green_host)), + None) + if blue_cluster_host and green_cluster_host: + self._corresponding_nodes.put_if_absent( + blue_cluster_host, (HostInfo(host=blue_cluster_host), HostInfo(host=green_cluster_host))) + + # Map blue reader cluster host to green reader cluster host. + blue_reader_cluster_host = next( + (blue_host for blue_host in blue_hosts if self._rds_utils.is_reader_cluster_dns(blue_host)), + None) + green_reader_cluster_host = next( + (green_host for green_host in green_hosts if self._rds_utils.is_reader_cluster_dns(green_host)), + None) + if blue_reader_cluster_host and green_reader_cluster_host: + self._corresponding_nodes.put_if_absent( + blue_reader_cluster_host, + (HostInfo(host=blue_reader_cluster_host), HostInfo(host=green_reader_cluster_host))) + + # Map blue custom cluster hosts to green custom cluster hosts. + for blue_host in blue_hosts: + if not self._rds_utils.is_rds_custom_cluster_dns(blue_host): + continue + + custom_cluster_name = self._rds_utils.get_cluster_id(blue_host) + if not custom_cluster_name: + continue + + corresponding_green_host = next( + (green_host for green_host in green_hosts + if self._rds_utils.is_rds_custom_cluster_dns(green_host) + and custom_cluster_name == self._rds_utils.remove_green_instance_prefix( + self._rds_utils.get_cluster_id(green_host))), + None + ) + + if corresponding_green_host: + self._corresponding_nodes.put_if_absent( + blue_host, (HostInfo(blue_host), HostInfo(corresponding_green_host))) + + def _get_writer_host(self, bg_role: BlueGreenRole) -> Optional[HostInfo]: + role_status = self._interim_statuses[bg_role.value] + if role_status is None: + return None + + hosts = role_status.start_topology + return next((host for host in hosts if host.role == HostRole.WRITER), None) + + def _get_reader_hosts(self, bg_role: BlueGreenRole) -> List[HostInfo]: + role_status = self._interim_statuses[bg_role.value] + if role_status is None: + return [] + + hosts = role_status.start_topology + reader_hosts = [host for host in hosts if host.role != HostRole.WRITER] + reader_hosts.sort(key=lambda host_info: host_info.host) + return reader_hosts + + def _update_summary_status(self, bg_role: BlueGreenRole, interim_status: BlueGreenInterimStatus): + if self._latest_phase == BlueGreenPhase.NOT_CREATED: + self._summary_status = BlueGreenStatus(self._bg_id, BlueGreenPhase.NOT_CREATED) + + elif self._latest_phase == BlueGreenPhase.CREATED: + self._update_dns_flags(bg_role, interim_status) + self._summary_status = self._get_status_of_created() + + elif self._latest_phase == BlueGreenPhase.PREPARATION: + self._start_switchover_timer() + self._update_dns_flags(bg_role, interim_status) + self._summary_status = self._get_status_of_preparation() + + elif self._latest_phase == BlueGreenPhase.IN_PROGRESS: + self._update_dns_flags(bg_role, interim_status) + self._summary_status = self._get_status_of_in_progress() + + elif self._latest_phase == BlueGreenPhase.POST: + self._update_dns_flags(bg_role, interim_status) + self._summary_status = self._get_status_of_post() + + elif self._latest_phase == BlueGreenPhase.COMPLETED: + self._update_dns_flags(bg_role, interim_status) + self._summary_status = self._get_status_of_completed() + + else: + raise ValueError(Messages.get_formatted("bgd.unknownPhase", self._bg_id, self._latest_phase)) + + def _update_dns_flags(self, bg_role: BlueGreenRole, interim_status: BlueGreenInterimStatus): + if bg_role == BlueGreenRole.SOURCE and not self._blue_dns_update_completed and interim_status.all_start_topology_ip_changed: + logger.debug("bgd.blueDnsCompleted", self._bg_id) + self._blue_dns_update_completed = True + self._store_event_phase_time("Blue DNS updated") + + if bg_role == BlueGreenRole.TARGET and not self._green_dns_removed and interim_status.all_start_topology_endpoints_removed: + logger.debug("bgd.greenDnsRemoved", self._bg_id) + self._green_dns_removed = True + self._store_event_phase_time("Green DNS removed") + + if bg_role == BlueGreenRole.TARGET and not self._green_topology_changed and interim_status.all_topology_changed: + logger.debug("bgd.greenTopologyChanged", self._bg_id) + self._green_topology_changed = True + self._store_event_phase_time("Green topology changed") + + def _store_event_phase_time(self, key_prefix: str, phase: Optional[BlueGreenPhase] = None): + rollback_str = " (rollback)" if self._rollback else "" + key = f"{key_prefix}{rollback_str}" + self._phase_times_ns.put_if_absent(key, PhaseTimeInfo(datetime.now(), perf_counter_ns(), phase)) + + def _start_switchover_timer(self): + if self._post_status_end_time_ns == 0: + self._post_status_end_time_ns = perf_counter_ns() + self._switchover_timeout_ns + + def _get_status_of_created(self) -> BlueGreenStatus: + """ + New connect requests: go to blue or green nodes; default behaviour; no routing. + Existing connections: default behaviour; no action. + Execute JDBC calls: default behaviour; no action. + """ + return BlueGreenStatus( + self._bg_id, + BlueGreenPhase.CREATED, + [], + [], + self._roles_by_host, + self._corresponding_nodes + ) + + def _get_status_of_preparation(self): + """ + New connect requests to blue: route to corresponding IP address. + New connect requests to green: route to corresponding IP address. + New connect requests with IP address: default behaviour; no routing. + Existing connections: default behaviour; no action. + Execute JDBC calls: default behaviour; no action. + """ + + if self._is_switchover_timer_expired(): + logger.debug("BlueGreenStatusProvider.SwitchoverTimeout") + if self._rollback: + return self._get_status_of_created() + return self._get_status_of_completed() + + connect_routings = self._get_blue_ip_address_connect_routings() + return BlueGreenStatus( + self._bg_id, + BlueGreenPhase.PREPARATION, + connect_routings, + [], + self._roles_by_host, + self._corresponding_nodes + ) + + def _is_switchover_timer_expired(self) -> bool: + return 0 < self._post_status_end_time_ns < perf_counter_ns() + + def _get_blue_ip_address_connect_routings(self) -> List[ConnectRouting]: + connect_routings: List[ConnectRouting] = [] + for host, role in self._roles_by_host.items(): + if role == BlueGreenRole.TARGET or host not in self._corresponding_nodes.keys(): + continue + + node_pair = self._corresponding_nodes.get(host) + if node_pair is None: + # TODO: is continuing the right thing to do in this case? + # TODO: port to JDBC + continue + + blue_host_info = node_pair[0] + blue_ip = self._host_ip_addresses.get(blue_host_info.host) + if blue_ip is None or not blue_ip.is_present(): + blue_ip_host_info = blue_host_info + else: + blue_ip_host_info = copy(blue_host_info) + blue_host_info.host = blue_ip.get() + + host_routing = SubstituteConnectRouting(blue_ip_host_info, host, role, (blue_host_info,)) + host_and_port = self._get_host_and_port(host, self._interim_statuses[role.value].port) + host_and_port_routing = SubstituteConnectRouting(blue_ip_host_info, host_and_port, role, (blue_host_info,)) + connect_routings.extend([host_routing, host_and_port_routing]) + + return connect_routings + + def _get_host_and_port(self, host: str, port: int): + return f"{host}:{port}" if port > 0 else host + + def _get_status_of_in_progress(self) -> BlueGreenStatus: + """ + New connect requests to blue: suspend or route to corresponding IP address (depending on settings). + New connect requests to green: suspend. + New connect requests with IP address: suspend. + Existing connections: default behaviour; no action. + Execute JDBC calls: suspend. + """ + + if self._is_switchover_timer_expired(): + logger.debug("BlueGreenStatusProvider.SwitchoverTimeout") + if self._rollback: + return self._get_status_of_created() + return self._get_status_of_completed() + + connect_routings: List[ConnectRouting] = [] + if self._suspend_blue_connections_when_in_progress: + connect_routings.append(SuspendConnectRouting(None, BlueGreenRole.SOURCE, self._bg_id)) + else: + # If we aren't suspending new blue connections, we should use IP addresses. + connect_routings.extend(self._get_blue_ip_address_connect_routings()) + + connect_routings.append(SuspendConnectRouting(None, BlueGreenRole.TARGET, self._bg_id)) + + # TODO: the code below is quite repetitive, see if we can refactor to clean things up + ip_addresses: Set[str] = {address_container.get() for address_container in self._host_ip_addresses.values() + if address_container.is_present()} + for ip_address in ip_addresses: + if self._suspend_blue_connections_when_in_progress: + # Check if the IP address belongs to one of the blue nodes. + interim_status = self._interim_statuses[BlueGreenRole.SOURCE.value] + if interim_status is not None and self._interim_status_contains_ip_address(interim_status, ip_address): + host_connect_routing = SuspendConnectRouting(ip_address, None, self._bg_id) + host_and_port = self._get_host_and_port(ip_address, interim_status.port) + host_port_connect_routing = SuspendConnectRouting(host_and_port, None, self._bg_id) + connect_routings.extend([host_connect_routing, host_port_connect_routing]) + continue + + # Check if the IP address belongs to one of the green nodes. + interim_status = self._interim_statuses[BlueGreenRole.TARGET.value] + if interim_status is not None and self._interim_status_contains_ip_address(interim_status, ip_address): + host_connect_routing = SuspendConnectRouting(ip_address, None, self._bg_id) + host_and_port = self._get_host_and_port(ip_address, interim_status.port) + host_port_connect_routing = SuspendConnectRouting(host_and_port, None, self._bg_id) + connect_routings.extend([host_connect_routing, host_port_connect_routing]) + continue + + # All blue and green traffic should be suspended. + execute_routings: List[ExecuteRouting] = [ + SuspendExecuteRouting(None, BlueGreenRole.SOURCE, self._bg_id), + SuspendExecuteRouting(None, BlueGreenRole.TARGET, self._bg_id)] + + # All traffic through connections with IP addresses that belong to blue or green nodes should be suspended. + for ip_address in ip_addresses: + # Check if the IP address belongs to one of the blue nodes. + interim_status = self._interim_statuses[BlueGreenRole.SOURCE.value] + if interim_status is not None and self._interim_status_contains_ip_address(interim_status, ip_address): + host_execute_routing = SuspendExecuteRouting(ip_address, None, self._bg_id) + host_and_port = self._get_host_and_port(ip_address, interim_status.port) + host_port_execute_routing = SuspendExecuteRouting(host_and_port, None, self._bg_id) + execute_routings.extend([host_execute_routing, host_port_execute_routing]) + continue + + # Check if the IP address belongs to one of the green nodes. + interim_status = self._interim_statuses[BlueGreenRole.TARGET.value] + if interim_status is not None and self._interim_status_contains_ip_address(interim_status, ip_address): + host_execute_routing = SuspendExecuteRouting(ip_address, None, self._bg_id) + host_and_port = self._get_host_and_port(ip_address, interim_status.port) + host_port_execute_routing = SuspendExecuteRouting(host_and_port, None, self._bg_id) + execute_routings.extend([host_execute_routing, host_port_execute_routing]) + continue + + execute_routings.append(SuspendExecuteRouting(ip_address, None, self._bg_id)) + + return BlueGreenStatus( + self._bg_id, + BlueGreenPhase.IN_PROGRESS, + connect_routings, + execute_routings, + self._roles_by_host, + self._corresponding_nodes + ) + + def _interim_status_contains_ip_address(self, interim_status: BlueGreenInterimStatus, ip_address: str) -> bool: + for ip_address_container in interim_status.start_ip_addresses_by_host_map.values(): + if ip_address_container.is_present() and ip_address_container.get() == ip_address: + return True + + return False + + def _get_status_of_post(self) -> BlueGreenStatus: + if self._is_switchover_timer_expired(): + logger.debug("BlueGreenStatusProvider.SwitchoverTimeout") + if self._rollback: + return self._get_status_of_created() + return self._get_status_of_completed() + + return BlueGreenStatus( + self._bg_id, + BlueGreenPhase.POST, + self._get_post_status_connect_routings(), + [], + self._roles_by_host, + self._corresponding_nodes + ) + + def _get_post_status_connect_routings(self) -> List[ConnectRouting]: + if self._blue_dns_update_completed and self._all_green_nodes_changed_name: + return [] if self._green_dns_removed else [RejectConnectRouting(None, BlueGreenRole.TARGET)] + + routings: List[ConnectRouting] = [] + # New connect calls to blue nodes should be routed to green nodes + for host, role in self._roles_by_host.items(): + if role != BlueGreenRole.SOURCE or host not in self._corresponding_nodes.keys(): + continue + + blue_host = host + is_blue_host_instance = self._rds_utils.is_rds_instance(blue_host) + node_pair = self._corresponding_nodes.get(blue_host) + # TODO: port null check to JDBC + blue_host_info = None if node_pair is None else node_pair[0] + green_host_info = None if node_pair is None else node_pair[1] + + if green_host_info is None: + # The corresponding green node was not found. We need to suspend the connection request. + host_suspend_routing = SuspendUntilCorrespondingNodeFoundConnectRouting(blue_host, role, self._bg_id) + interim_status = self._interim_statuses[role.value] + host_and_port = self._get_host_and_port(blue_host, interim_status.port) + host_port_suspend_routing = ( + SuspendUntilCorrespondingNodeFoundConnectRouting(host_and_port, None, self._bg_id)) + routings.extend([host_suspend_routing, host_port_suspend_routing]) + else: + green_host = green_host_info.host + green_ip_container = self._host_ip_addresses.get(green_host) + if green_ip_container is None or not green_ip_container.is_present(): + green_ip_host_info = green_host_info + else: + green_ip_host_info = copy(green_host_info) + green_ip_host_info.host = green_ip_container.get() + + # Check whether the green host has already been connected a non-prefixed blue IAM host name. + if self._is_already_successfully_connected(green_host, blue_host): + # Green node has already changed its name, and it's not a new non-prefixed blue node. + # TODO: port to JDBC + iam_hosts: Optional[Tuple[HostInfo, ...]] = None if blue_host_info is None else (blue_host_info,) + else: + # The green node has not yet changed ist name, so we need to try both possible IAM hosts. + # TODO: port to JDBC + iam_hosts = (green_host_info,) if blue_host_info is None else (green_host_info, blue_host_info) + + iam_auth_success_handler = None if is_blue_host_instance \ + else lambda iam_host: self._register_iam_host(green_host, iam_host) + host_substitute_routing = SubstituteConnectRouting( + green_ip_host_info, blue_host, role, iam_hosts, iam_auth_success_handler) + interim_status = self._interim_statuses[role.value] + host_and_port = self._get_host_and_port(blue_host, interim_status.port) + host_port_substitute_routing = SubstituteConnectRouting( + green_ip_host_info, host_and_port, role, iam_hosts, iam_auth_success_handler) + routings.extend([host_substitute_routing, host_port_substitute_routing]) + + if not self._green_dns_removed: + routings.append(RejectConnectRouting(None, BlueGreenRole.TARGET)) + + return routings + + def _is_already_successfully_connected(self, connect_host: str, iam_host: str): + success_hosts = self._iam_auth_success_hosts.compute_if_absent(connect_host, lambda _: ConcurrentSet()) + return success_hosts is not None and iam_host in success_hosts + + def _register_iam_host(self, connect_host: str, iam_host: str): + success_hosts = self._iam_auth_success_hosts.compute_if_absent(connect_host, lambda _: ConcurrentSet()) + if success_hosts is None: + success_hosts = ConcurrentSet() + + if connect_host != iam_host: + if success_hosts is not None and iam_host in success_hosts: + self._green_node_name_change_times.compute_if_absent(connect_host, lambda _: datetime.now()) + logger.debug("BlueGreenStatusProvider.GreenNodeChangedName", connect_host, iam_host) + + success_hosts.add(iam_host) + if connect_host != iam_host: + # Check whether all IAM hosts have changed their names + all_hosts_changed_names = all( + any(iam_host != original_host for iam_host in iam_hosts) + for original_host, iam_hosts in self._iam_auth_success_hosts.items() + if iam_hosts # Filter out empty sets + ) + + if all_hosts_changed_names and not self._all_green_nodes_changed_name: + logger.debug("BlueGreenStatusProvider.AllGreenNodesChangedName") + self._all_green_nodes_changed_name = True + self._store_event_phase_time("Green node certificates changed") + + def _get_status_of_completed(self) -> BlueGreenStatus: + if self._is_switchover_timer_expired(): + logger.debug("BlueGreenStatusProvider.SwitchoverTimeout") + if self._rollback: + return self._get_status_of_created() + + return BlueGreenStatus( + self._bg_id, BlueGreenPhase.COMPLETED, [], [], self._roles_by_host, self._corresponding_nodes) + + if not self._blue_dns_update_completed or not self._green_dns_removed: + return self._get_status_of_post() + + return BlueGreenStatus( + self._bg_id, BlueGreenPhase.COMPLETED, [], [], self._roles_by_host, ConcurrentDict()) + + def _update_monitors(self): + phase = self._summary_status.phase + if phase == BlueGreenPhase.NOT_CREATED: + for monitor in self._monitors: + monitor.interval_rate = BlueGreenIntervalRate.BASELINE + monitor.collect_ip_address = False + monitor.collect_topology = False + monitor.use_ip_address = False + elif phase == BlueGreenPhase.CREATED: + for monitor in self._monitors: + monitor.interval_rate = BlueGreenIntervalRate.INCREASED + monitor.collect_ip_address = True + monitor.collect_topology = True + monitor.use_ip_address = False + if self._rollback: + monitor.reset_collected_data() + elif phase == BlueGreenPhase.PREPARATION \ + or phase == BlueGreenPhase.IN_PROGRESS \ + or phase == BlueGreenPhase.POST: + for monitor in self._monitors: + monitor.interval_rate = BlueGreenIntervalRate.HIGH + monitor.collect_ip_address = False + monitor.collect_topology = False + monitor.use_ip_address = True + elif phase == BlueGreenPhase.COMPLETED: + for monitor in self._monitors: + monitor.interval_rate = BlueGreenIntervalRate.BASELINE + monitor.collect_ip_address = False + monitor.collect_topology = False + monitor.use_ip_address = False + monitor.reset_collected_data() + + # Stop monitoring old1 cluster/instance. + if not self._rollback and self._monitors[BlueGreenRole.SOURCE.value] is not None: + self._monitors[BlueGreenRole.SOURCE.value].stop = True + else: + raise UnsupportedOperationError( + Messages.get_formatted( + "BlueGreenStatusProvider.UnknownPhase", self._bg_id, self._summary_status.phase)) + + def _update_status_cache(self): + latest_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + self._plugin_service.set_status(BlueGreenStatus, self._summary_status, self._bg_id) + + if latest_status is not None: + # Notify all waiting threads that the status has been updated. + with latest_status: + latest_status.cv.notify_all() + + def _log_current_context(self): + logger.debug(f"[bg_id: '{self._bg_id}'] Summary status: \n{self._summary_status}") + nodes_str = "\n".join( + f" {blue_host} -> {node_pair[1] if node_pair else None}" + for blue_host, node_pair in self._corresponding_nodes.items()) + logger.debug(f"Corresponding nodes:\n{nodes_str}") + phase_times = \ + "\n".join(f" {event_desc} -> {info.date_time}" for event_desc, info in self._phase_times_ns.items()) + logger.debug(f"Phase times:\n{phase_times}") + change_name_times = \ + "\n".join(f" {host} -> {date_time}" for host, date_time in self._green_node_name_change_times.items()) + logger.debug(f"Green node certificate change times:\n{change_name_times}") + logger.debug("\n" + f" latest_status_phase: {self._latest_phase}\n" + f" blue_dns_update_completed: {self._blue_dns_update_completed}\n" + f" green_dns_removed: {self._green_dns_removed}\n" + f" all_green_nodes_changed_name: {self._all_green_nodes_changed_name}\n" + f" green_topology_changed: {self._green_topology_changed}\n") + + def _log_switchover_final_summary(self): + switchover_completed = (not self._rollback and self._summary_status.phase == BlueGreenPhase.COMPLETED) or \ + (self._rollback and self._summary_status.phase == BlueGreenPhase.CREATED) + has_active_switchover_phases = \ + any(phase_info.phase is not None and phase_info.phase.is_active_switchover_completed + for phase_info in self._phase_times_ns.values()) + + if not switchover_completed or not has_active_switchover_phases: + return + + # TODO: Key is not quite right, need to fix it + # TODO: port fix to JDBC + time_zero_phase = BlueGreenPhase.PREPARATION if self._rollback else BlueGreenPhase.IN_PROGRESS + time_zero = self._phase_times_ns.get(time_zero_phase.name) + sorted_phase_entries = sorted(self._phase_times_ns.items(), key=lambda entry: entry[1].timestamp_ns) + phase_time_lines = [ + f"{entry[1].date_time:>28s} " + f"{'' if time_zero is None else (entry[1].timestamp_ns - time_zero.timestamp_ns) / 1_000_000:>18s} ms " + f"{entry[0]:>31s}" for entry in sorted_phase_entries + ] + phase_times_str = "\n".join(phase_time_lines) + divider = "----------------------------------------------------------------------------------\n" + log_message = (f"[bg_id: '{self._bg_id}']\n{divider}" + f"{'timestamp':<28s} {'time offset (ms)':>21s} {'event':>31s}{divider}" + f"{phase_times_str}\n{divider}") + logger.debug(log_message) + + def _reset_context_when_completed(self): + switchover_completed = (not self._rollback and self._summary_status.phase == BlueGreenPhase.COMPLETED) or \ + (self._rollback and self._summary_status.phase == BlueGreenPhase.CREATED) + has_active_switchover_phases = \ + any(phase_info.phase is not None and phase_info.phase.is_active_switchover_completed + for phase_info in self._phase_times_ns.values()) + + if not switchover_completed or not has_active_switchover_phases: + return + + logger.debug("BlueGreenStatusProvider.ResetContext") + self._rollback = False + self._summary_status = None + self._latest_phase = BlueGreenPhase.NOT_CREATED + self._phase_times_ns.clear() + self._blue_dns_update_completed = False + self._green_dns_removed = False + self._green_topology_changed = False + self._all_green_nodes_changed_name = False + self._post_status_end_time_ns = 0 + self._interim_status_hashes = [0, 0] + self._latest_context_hash = 0 + self._interim_statuses = [None, None] + self._host_ip_addresses.clear() + self._corresponding_nodes.clear() + self._roles_by_host.clear() + self._iam_auth_success_hosts.clear() + self._green_node_name_change_times.clear() + + +@dataclass +class PhaseTimeInfo: + date_time: datetime + timestamp_ns: int + phase: Optional[BlueGreenPhase] diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index 3ea84d02..dfb7033f 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -67,6 +67,7 @@ BlueGreenStatusProvider.GreenDnsRemoved=[BlueGreenStatusProvider] [bgdId: '{}'] BlueGreenStatusProvider.GreenNodeChangedName=[BlueGreenStatusProvider] Green node '{}' has changed its name to '{}'. BlueGreenStatusProvider.GreenTopologyChanged=[BlueGreenStatusProvider] [bgdId: '{}'] Green topology changed. BlueGreenStatusProvider.InterimStatus=[BlueGreenStatusProvider] [bgdId: '{}', role: {}] {} +self._plugin_service.current_host_info=[BlueGreenStatusProvider] [bgdId: '{}'] Unable to create Blue/Green monitors because information about the current host was not found. BlueGreenStatusProvider.ResetContext=[BlueGreenStatusProvider] Resetting context. BlueGreenStatusProvider.Rollback=[BlueGreenStatusProvider] [bgdId: '{}'] Blue/Green deployment is in rollback mode. BlueGreenStatusProvider.SwitchoverTimeout=[BlueGreenStatusProvider] Blue/Green switchover has timed out. diff --git a/aws_advanced_python_wrapper/utils/concurrent.py b/aws_advanced_python_wrapper/utils/concurrent.py index be50dc1d..6e55a613 100644 --- a/aws_advanced_python_wrapper/utils/concurrent.py +++ b/aws_advanced_python_wrapper/utils/concurrent.py @@ -14,12 +14,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Dict, Iterator, Set, Union, ValuesView if TYPE_CHECKING: from collections.abc import ItemsView -from threading import Lock +from threading import Lock, RLock from typing import Callable, Generic, KeysView, List, Optional, TypeVar K = TypeVar('K') @@ -63,6 +63,10 @@ def compute_if_absent(self, key: K, mapping_func: Callable) -> Optional[V]: return new_value return value + def put(self, key: K, value: V): + with self._lock: + self._dict[key] = value + def put_if_absent(self, key: K, new_value: V) -> V: with self._lock: existing_value = self._dict.get(key) @@ -71,6 +75,11 @@ def put_if_absent(self, key: K, new_value: V) -> V: return new_value return existing_value + def put_all(self, other_dict: Union[ConcurrentDict[K, V], Dict[K, V]]): + with self._lock: + for k, v in other_dict.items(): + self._dict[k] = v + def remove(self, key: K) -> V: with self._lock: return self._dict.pop(key, None) @@ -96,5 +105,34 @@ def apply_if(self, predicate: Callable, apply: Callable): def keys(self) -> KeysView: return self._dict.keys() + def values(self) -> ValuesView: + return self._dict.values() + def items(self) -> ItemsView: return self._dict.items() + + +class ConcurrentSet(Generic[V]): + def __init__(self): + self._set: Set[V] = set() + self._lock = RLock() + + def __len__(self): + with self._lock: + return len(self._set) + + def __contains__(self, item: V) -> bool: + with self._lock: + return item in self._set + + def __iter__(self) -> Iterator[V]: + with self._lock: + return iter(set(self._set)) + + def add(self, item: V): + with self._lock: + self._set.add(item) + + def remove(self, item: V): + with self._lock: + self._set.remove(item) diff --git a/aws_advanced_python_wrapper/utils/properties.py b/aws_advanced_python_wrapper/utils/properties.py index 2fe4edd6..42e7af2b 100644 --- a/aws_advanced_python_wrapper/utils/properties.py +++ b/aws_advanced_python_wrapper/utils/properties.py @@ -20,7 +20,9 @@ class Properties(Dict[str, Any]): - pass + def put_if_absent(self, key: str, value: Any): + if self.get(key) is None: + self[key] = value class WrapperProperty: diff --git a/tests/unit/test_blue_green_plugin.py b/tests/unit/test_blue_green_plugin.py deleted file mode 100644 index 478bdb8f..00000000 --- a/tests/unit/test_blue_green_plugin.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import time -from copy import deepcopy -from datetime import datetime -from types import MappingProxyType - -from aws_advanced_python_wrapper.blue_green_plugin import ( - BlueGreenInterimStatus, BlueGreenPhase, BlueGreenRole, BlueGreenStatus, - PassThroughConnectRouting, PassThroughExecuteRouting, - SubstituteConnectRouting) -from aws_advanced_python_wrapper.host_availability import HostAvailability -from aws_advanced_python_wrapper.hostinfo import HostInfo, HostRole -from aws_advanced_python_wrapper.utils.concurrent import ConcurrentDict -from aws_advanced_python_wrapper.utils.value_container import ValueContainer - - -# TODO: remove unnecessary tests -def test_status_str(): - connect_routing1 = PassThroughConnectRouting(None, BlueGreenRole.SOURCE) - connect_routing2 = PassThroughConnectRouting("localhost:5432", BlueGreenRole.SOURCE) - connect_routings = (connect_routing1, connect_routing2) - execute_routing1 = PassThroughExecuteRouting(None, BlueGreenRole.SOURCE) - execute_routing2 = PassThroughExecuteRouting("localhost:5432", BlueGreenRole.SOURCE) - execute_routings = (execute_routing1, execute_routing2) - - role_by_endpoint = MappingProxyType({"localhost-1": BlueGreenRole.SOURCE, "localhost-2": BlueGreenRole.TARGET}) - status = ( - BlueGreenStatus("asdf", BlueGreenPhase.PREPARATION, connect_routings, execute_routings, role_by_endpoint)) - print(f"\n{status}") - - -def test_interim_status_str(): - start_ips = ConcurrentDict() - start_ips.put_if_absent("instance-1", ValueContainer.of("1.1.1.1")) - start_ips.put_if_absent("instance-2", ValueContainer.empty()) - status = BlueGreenInterimStatus( - BlueGreenPhase.CREATED, - "1.0", - 5432, - (HostInfo("instance-1"), HostInfo("instance-2")), - start_ips, - (HostInfo("instance-1"), HostInfo("instance-2")), - start_ips, - {"instance-1", "instance-2"}, - True, - True, - False - ) - - print(f"\n{status}") - - -def test_substitute_connect_routing(): - example_host = HostInfo("instance-1sdfsaklfdjsaklfdjsaklfjslkdfjslkdfjsa", 5432, HostRole.WRITER, HostAvailability.AVAILABLE) - iam_hosts = (example_host, example_host, example_host) - routing = SubstituteConnectRouting( - "instance-1:5432", - BlueGreenRole.SOURCE, - example_host, - iam_hosts, - lambda host: None - ) - - print(f"\n{routing}") - - -def test_host_copy(): - h1 = HostInfo("localhost", 5432, HostRole.READER, HostAvailability.UNAVAILABLE, weight=5, host_id="localhost", last_update_time=datetime.now()) - h2 = deepcopy(h1) - assert h1 == h2 - - -def test_time(): - print(time.time()) From d0b1ee924151d50fb48a4accfa61e969ac0983be Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 12 Jun 2025 14:28:59 -0700 Subject: [PATCH 08/41] Cleanup, fix some TODOs --- .../blue_green_plugin.py | 103 ++++++++++-------- 1 file changed, 59 insertions(+), 44 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 34429d77..a91a2e83 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -682,10 +682,10 @@ def connect( while routing is not None and conn is None: conn = routing.apply(self, host_info, props, is_initial_connection, connect_func, self._plugin_service) if conn is None: - self._bg_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) - if self._bg_status is None: - # TODO: should we just continue in this case? - continue + latest_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + if latest_status is not None: + self._bg_status = latest_status + routing = \ next((r for r in self._bg_status.connect_routings if r.is_match(host_info, bg_role)), None) @@ -751,7 +751,10 @@ def execute(self, target: type, method_name: str, execute_func: Callable, *args: *args, **kwargs) if not result.is_present(): - self._bg_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + latest_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + if latest_status is not None: + self._bg_status = latest_status + routing = \ next((r for r in self._bg_status.execute_routings if r.is_match(host_info, bg_role)), None) @@ -1121,13 +1124,7 @@ def _update_ip_address_flags(self): if not self._should_collect_ip_addresses: # Check whether all hosts in start_topology resolve to new IP addresses - # TODO: do we need to make the value type equivalent to Java.Optional? - self._all_start_topology_ip_changed = bool(self._start_topology) and \ - all( - self._start_ip_addresses_by_host.get(node.host) is not None and - self._current_ip_addresses_by_host.get(node.host) is not None and - self._start_ip_addresses_by_host.get(node.host) != self._current_ip_addresses_by_host.get(node.host) - for node in self._start_topology) + self._all_start_topology_ip_changed = self._has_all_start_topology_ip_changed() # Check whether all hosts in start_topology no longer have IP addresses. This indicates that the start_topology # hosts can no longer be resolved because their DNS entries no longer exist. @@ -1148,6 +1145,22 @@ def _update_ip_address_flags(self): start_topology_hosts and all(node.host not in start_topology_hosts for node in current_topology_copy)) + def _has_all_start_topology_ip_changed(self) -> bool: + if not self._start_topology: + return False + + for host_info in self._start_topology: + start_ip = self._start_ip_addresses_by_host.get(host_info.host) + current_ip = self._start_ip_addresses_by_host.get(host_info.host) + if start_ip is None or not start_ip.is_present() or \ + current_ip is None or not current_ip.is_present(): + return False + + if start_ip.get() == current_ip.get(): + return False + + return True + def reset_collected_data(self): self._start_ip_addresses_by_host.clear() self._start_topology = [] @@ -1215,6 +1228,7 @@ def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str) current_host_info = self._plugin_service.current_host_info if current_host_info is None: + # TODO: raise an error instead? logger.warning("BlueGreenStatusProvider.NoCurrentHostInfo", self._bg_id) return @@ -1251,7 +1265,6 @@ def _get_monitoring_props(self) -> Properties: def _process_interim_status(self, bg_role: BlueGreenRole, interim_status: BlueGreenInterimStatus): with self._process_status_lock: - # TODO: don't need null check of interim_status in JDBC because interim_status is always not null status_hash = interim_status.get_custom_hashcode() context_hash = self._get_context_hash() if self._interim_status_hashes[bg_role.value] == status_hash and self._latest_context_hash == context_hash: @@ -1331,22 +1344,24 @@ def _update_corresponding_nodes(self): self._corresponding_nodes.put( blue_writer_host_info.host, (blue_writer_host_info, green_writer_host_info)) - # TODO: port sorted blue reader length check to JDBC - if len(sorted_green_readers) > 0 and len(sorted_blue_readers) > 0: - # Map each to blue reader to a green reader. - green_index = 0 - for blue_host_info in sorted_blue_readers: - self._corresponding_nodes.put( - blue_host_info.host, (blue_host_info, sorted_green_readers[green_index])) - green_index += 1 - # The modulo operation prevents us from exceeding the bounds of sorted_green_readers if there are - # more blue readers than green readers. In this case, multiple blue readers may be mapped to the - # same green reader. - green_index %= len(sorted_green_readers) - else: - # There's no green readers - map all blue reader nodes to the green writer - for blue_host_info in sorted_blue_readers: - self._corresponding_nodes.put(blue_host_info.host, (blue_host_info, green_writer_host_info)) + if sorted_blue_readers: + # Map blue readers to green nodes + if sorted_green_readers: + # Map each to blue reader to a green reader. + green_index = 0 + for blue_host_info in sorted_blue_readers: + self._corresponding_nodes.put( + blue_host_info.host, (blue_host_info, sorted_green_readers[green_index])) + green_index += 1 + # The modulo operation prevents us from exceeding the bounds of sorted_green_readers if there are + # more blue readers than green readers. In this case, multiple blue readers may be mapped to the + # same green reader. + green_index %= len(sorted_green_readers) + else: + # There's no green readers - map all blue reader nodes to the green writer + for blue_host_info in sorted_blue_readers: + self._corresponding_nodes.put(blue_host_info.host, (blue_host_info, green_writer_host_info)) + if source_status.host_names and target_status.host_names: blue_hosts = source_status.host_names @@ -1404,7 +1419,7 @@ def _get_writer_host(self, bg_role: BlueGreenRole) -> Optional[HostInfo]: hosts = role_status.start_topology return next((host for host in hosts if host.role == HostRole.WRITER), None) - def _get_reader_hosts(self, bg_role: BlueGreenRole) -> List[HostInfo]: + def _get_reader_hosts(self, bg_role: BlueGreenRole) -> Optional[List[HostInfo]]: role_status = self._interim_statuses[bg_role.value] if role_status is None: return [] @@ -1513,13 +1528,8 @@ def _is_switchover_timer_expired(self) -> bool: def _get_blue_ip_address_connect_routings(self) -> List[ConnectRouting]: connect_routings: List[ConnectRouting] = [] for host, role in self._roles_by_host.items(): - if role == BlueGreenRole.TARGET or host not in self._corresponding_nodes.keys(): - continue - node_pair = self._corresponding_nodes.get(host) - if node_pair is None: - # TODO: is continuing the right thing to do in this case? - # TODO: port to JDBC + if role == BlueGreenRole.TARGET or node_pair is None: continue blue_host_info = node_pair[0] @@ -1531,7 +1541,11 @@ def _get_blue_ip_address_connect_routings(self) -> List[ConnectRouting]: blue_host_info.host = blue_ip.get() host_routing = SubstituteConnectRouting(blue_ip_host_info, host, role, (blue_host_info,)) - host_and_port = self._get_host_and_port(host, self._interim_statuses[role.value].port) + interim_status = self._interim_statuses[role.value] + if interim_status is None: + continue + + host_and_port = self._get_host_and_port(host, interim_status.port) host_and_port_routing = SubstituteConnectRouting(blue_ip_host_info, host_and_port, role, (blue_host_info,)) connect_routings.extend([host_routing, host_and_port_routing]) @@ -1564,7 +1578,6 @@ def _get_status_of_in_progress(self) -> BlueGreenStatus: connect_routings.append(SuspendConnectRouting(None, BlueGreenRole.TARGET, self._bg_id)) - # TODO: the code below is quite repetitive, see if we can refactor to clean things up ip_addresses: Set[str] = {address_container.get() for address_container in self._host_ip_addresses.values() if address_container.is_present()} for ip_address in ip_addresses: @@ -1659,7 +1672,6 @@ def _get_post_status_connect_routings(self) -> List[ConnectRouting]: blue_host = host is_blue_host_instance = self._rds_utils.is_rds_instance(blue_host) node_pair = self._corresponding_nodes.get(blue_host) - # TODO: port null check to JDBC blue_host_info = None if node_pair is None else node_pair[0] green_host_info = None if node_pair is None else node_pair[1] @@ -1667,6 +1679,9 @@ def _get_post_status_connect_routings(self) -> List[ConnectRouting]: # The corresponding green node was not found. We need to suspend the connection request. host_suspend_routing = SuspendUntilCorrespondingNodeFoundConnectRouting(blue_host, role, self._bg_id) interim_status = self._interim_statuses[role.value] + if interim_status is None: + continue + host_and_port = self._get_host_and_port(blue_host, interim_status.port) host_port_suspend_routing = ( SuspendUntilCorrespondingNodeFoundConnectRouting(host_and_port, None, self._bg_id)) @@ -1683,11 +1698,9 @@ def _get_post_status_connect_routings(self) -> List[ConnectRouting]: # Check whether the green host has already been connected a non-prefixed blue IAM host name. if self._is_already_successfully_connected(green_host, blue_host): # Green node has already changed its name, and it's not a new non-prefixed blue node. - # TODO: port to JDBC iam_hosts: Optional[Tuple[HostInfo, ...]] = None if blue_host_info is None else (blue_host_info,) else: # The green node has not yet changed ist name, so we need to try both possible IAM hosts. - # TODO: port to JDBC iam_hosts = (green_host_info,) if blue_host_info is None else (green_host_info, blue_host_info) iam_auth_success_handler = None if is_blue_host_instance \ @@ -1695,6 +1708,9 @@ def _get_post_status_connect_routings(self) -> List[ConnectRouting]: host_substitute_routing = SubstituteConnectRouting( green_ip_host_info, blue_host, role, iam_hosts, iam_auth_success_handler) interim_status = self._interim_statuses[role.value] + if interim_status is None: + continue + host_and_port = self._get_host_and_port(blue_host, interim_status.port) host_port_substitute_routing = SubstituteConnectRouting( green_ip_host_info, host_and_port, role, iam_hosts, iam_auth_success_handler) @@ -1826,10 +1842,9 @@ def _log_switchover_final_summary(self): if not switchover_completed or not has_active_switchover_phases: return - # TODO: Key is not quite right, need to fix it - # TODO: port fix to JDBC time_zero_phase = BlueGreenPhase.PREPARATION if self._rollback else BlueGreenPhase.IN_PROGRESS - time_zero = self._phase_times_ns.get(time_zero_phase.name) + time_zero_key = f"{time_zero_phase.name} (rollback)" if self._rollback else time_zero_phase.name + time_zero = self._phase_times_ns.get(time_zero_key) sorted_phase_entries = sorted(self._phase_times_ns.items(), key=lambda entry: entry[1].timestamp_ns) phase_time_lines = [ f"{entry[1].date_time:>28s} " From 6c94ba7c883dca1dc9da9775de8eb76055e7d84b Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 13 Jun 2025 10:05:25 -0700 Subject: [PATCH 09/41] Fix .flake8 formatting --- .flake8 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.flake8 b/.flake8 index 847900aa..26b87807 100644 --- a/.flake8 +++ b/.flake8 @@ -1,5 +1,4 @@ [flake8] max-line-length = 150 extend-select = TC, TC1 -exclude = - venv/, +exclude = venv/ From 4319ef2147c8da343a337248819edd43a9b1d4bc Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 13 Jun 2025 10:19:50 -0700 Subject: [PATCH 10/41] Attempt to fix build --- aws_advanced_python_wrapper/utils/concurrent.py | 2 +- tests/unit/test_hostinfo.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aws_advanced_python_wrapper/utils/concurrent.py b/aws_advanced_python_wrapper/utils/concurrent.py index be50dc1d..244f8236 100644 --- a/aws_advanced_python_wrapper/utils/concurrent.py +++ b/aws_advanced_python_wrapper/utils/concurrent.py @@ -17,7 +17,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from collections.abc import ItemsView + from typing import ItemsView from threading import Lock from typing import Callable, Generic, KeysView, List, Optional, TypeVar diff --git a/tests/unit/test_hostinfo.py b/tests/unit/test_hostinfo.py index 29249ad9..ad8df033 100644 --- a/tests/unit/test_hostinfo.py +++ b/tests/unit/test_hostinfo.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections.abc import Hashable +from typing import Hashable import pytest From 2c8262bb7a2579f7d933e81118753fa7f3e48451 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 13 Jun 2025 15:48:40 -0700 Subject: [PATCH 11/41] Port over integration/host code from JDBC --- docs/development-guide/IntegrationTests.md | 6 +- tests/integration/container/conftest.py | 2 +- .../container/utils/rds_test_utility.py | 10 +- .../container/utils/test_environment.py | 2 +- .../container/utils/test_environment_info.py | 14 +- tests/integration/host/build.gradle.kts | 76 +- .../integration/DatabaseEngineDeployment.java | 3 +- .../test/java/integration/DriverHelper.java | 164 +-- .../integration/TestEnvironmentFeatures.java | 5 +- .../java/integration/TestEnvironmentInfo.java | 41 +- .../java/integration/TestInstanceInfo.java | 6 +- .../integration/host/TestEnvironment.java | 866 ++++++++++--- .../host/TestEnvironmentConfiguration.java | 33 +- .../host/TestEnvironmentProvider.java | 93 +- .../integration/util/AuroraTestUtility.java | 1135 +++++++++++++---- .../integration/util/ContainerHelper.java | 44 +- 16 files changed, 1791 insertions(+), 709 deletions(-) diff --git a/docs/development-guide/IntegrationTests.md b/docs/development-guide/IntegrationTests.md index 5dca571c..3c8b8f61 100644 --- a/docs/development-guide/IntegrationTests.md +++ b/docs/development-guide/IntegrationTests.md @@ -120,13 +120,13 @@ unset FILTER # Done testing the IAM tests, unset FILTER | `DB_USER` | Yes | The username to access the database. | `admin` | | `DB_PASSWORD` | Yes | The database cluster password. | `password` | | `DB_DATABASE_NAME` | No | Name of the database that will be used by the tests. The default database name is test. | `test_db_name` | -| `RDS_CLUSTER_NAME` | Yes | The database identifier for your Aurora or RDS cluster. Must be a unique value to avoid conflicting with existing clusters. | `db-identifier` | -| `RDS_CLUSTER_DOMAIN` | No | The existing database connection suffix. Use this variable to run against an existing database. | `XYZ.us-east-2.rds.amazonaws.com` | +| `RDS_DB_NAME` | Yes | The database identifier for your Aurora or RDS cluster. Must be a unique value to avoid conflicting with existing clusters. | `db-identifier` | +| `RDS_DB_DOMAIN` | No | The existing database connection suffix. Use this variable to run against an existing database. | `XYZ.us-east-2.rds.amazonaws.com` | | `IAM_USER` | No | User within the database that is identified with AWSAuthenticationPlugin. This is used for AWS IAM Authentication and is optional | `example_user_name` | | `AWS_ACCESS_KEY_ID` | Yes | An AWS access key associated with an IAM user or role with RDS permissions. | `ASIAIOSFODNN7EXAMPLE` | | `AWS_SECRET_ACCESS_KEY` | Yes | The secret key associated with the provided AWS_ACCESS_KEY_ID. | `wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY` | | `AWS_SESSION_TOKEN` | No | AWS Session Token for CLI, SDK, & API access. This value is for MFA credentials only. See: [temporary AWS credentials](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_use-resources.html). | `AQoDYXdzEJr...` | | -| `REUSE_RDS_CLUSTER` | Yes | Set to true if you would like to use an existing cluster for your tests. | `false` | +| `REUSE_RDS_DB` | Yes | Set to true if you would like to use an existing cluster for your tests. | `false` | | `RDS_DB_REGION` | Yes | The database region. | `us-east-2` | | `DEBUG_ENV` | No | The IDE you will be using to debug the tests, values are either `PYCHARM` or `VSCODE` | `PYCHARM` | diff --git a/tests/integration/container/conftest.py b/tests/integration/container/conftest.py index eefabcaf..8bf68ece 100644 --- a/tests/integration/container/conftest.py +++ b/tests/integration/container/conftest.py @@ -86,7 +86,7 @@ def pytest_runtest_setup(item): deployment = request.get_database_engine_deployment() if DatabaseEngineDeployment.AURORA == deployment or DatabaseEngineDeployment.RDS_MULTI_AZ == deployment: rds_utility = RdsTestUtility(info.get_region(), info.get_rds_endpoint()) - rds_utility.wait_until_cluster_has_desired_status(info.get_cluster_name(), "available") + rds_utility.wait_until_cluster_has_desired_status(info.get_db_name(), "available") # Need to ensure that cluster details through API matches topology fetched through SQL # Wait up to 5min diff --git a/tests/integration/container/utils/rds_test_utility.py b/tests/integration/container/utils/rds_test_utility.py index 27d48aab..e6f0d320 100644 --- a/tests/integration/container/utils/rds_test_utility.py +++ b/tests/integration/container/utils/rds_test_utility.py @@ -78,7 +78,7 @@ def create_db_instance(self, instance_id: str) -> TestInstanceInfo: self.delete_db_instance(instance_id) self._client.create_db_instance( - DBClusterIdentifier=environment.get_info().get_cluster_name(), + DBClusterIdentifier=environment.get_info().get_db_name(), DBInstanceIdentifier=instance_id, DBInstanceClass="db.r5.large", Engine=self.get_aurora_engine_name(environment.get_engine()), @@ -138,7 +138,7 @@ def failover_cluster_and_wait_until_writer_changed( start = perf_counter_ns() if cluster_id is None: - cluster_id = TestEnvironment.get_current().get_info().get_cluster_name() + cluster_id = TestEnvironment.get_current().get_info().get_db_name() if initial_writer_id is None: initial_writer_id = self.get_cluster_writer_instance_id(cluster_id) @@ -170,7 +170,7 @@ def failover_cluster_and_wait_until_writer_changed( def failover_cluster(self, cluster_id: Optional[str] = None, target_id: Optional[str] = None) -> None: if cluster_id is None: - cluster_id = TestEnvironment.get_current().get_info().get_cluster_name() + cluster_id = TestEnvironment.get_current().get_info().get_db_name() self.wait_until_cluster_has_desired_status(cluster_id, "available") @@ -268,7 +268,7 @@ def _query_multi_az_instance_id(self, conn: Connection, engine: DatabaseEngine): def is_db_instance_writer(self, instance_id: str, cluster_id: Optional[str] = None) -> bool: if cluster_id is None: - cluster_id = TestEnvironment.get_current().get_info().get_cluster_name() + cluster_id = TestEnvironment.get_current().get_info().get_db_name() cluster_info = self.get_db_cluster(cluster_id) members = cluster_info.get("DBClusterMembers") for m in members: @@ -278,7 +278,7 @@ def is_db_instance_writer(self, instance_id: str, cluster_id: Optional[str] = No def get_cluster_writer_instance_id(self, cluster_id: Optional[str] = None) -> str: if cluster_id is None: - cluster_id = TestEnvironment.get_current().get_info().get_cluster_name() + cluster_id = TestEnvironment.get_current().get_info().get_db_name() cluster_info = self.get_db_cluster(cluster_id) members = cluster_info.get("DBClusterMembers") for m in members: diff --git a/tests/integration/container/utils/test_environment.py b/tests/integration/container/utils/test_environment.py index c1c9d749..e547dba2 100644 --- a/tests/integration/container/utils/test_environment.py +++ b/tests/integration/container/utils/test_environment.py @@ -203,7 +203,7 @@ def get_writer(self) -> TestInstanceInfo: return self.get_instances()[0] def get_cluster_name(self) -> str: - return self.get_info().get_cluster_name() + return self.get_info().get_db_name() def get_proxy_database_info(self) -> TestProxyDatabaseInfo: return self.get_info().get_proxy_database_info() diff --git a/tests/integration/container/utils/test_environment_info.py b/tests/integration/container/utils/test_environment_info.py index d1f5966c..31c2500b 100644 --- a/tests/integration/container/utils/test_environment_info.py +++ b/tests/integration/container/utils/test_environment_info.py @@ -30,8 +30,11 @@ class TestEnvironmentInfo: _aws_session_token: str _region: str _rds_endpoint: str - _cluster_name: str + _db_name: str _iam_user_name: str + _bg_deployment_id: str + _cluster_parameter_group: str + _random_base: str _database_info: TestDatabaseInfo _proxy_database_info: TestProxyDatabaseInfo _traces_telemetry_info: TestTelemetryInfo @@ -50,8 +53,11 @@ def __init__(self, test_info: Dict[str, Any]) -> None: self._aws_session_token = typing.cast('str', test_info.get("awsSessionToken")) self._region = typing.cast('str', test_info.get("region")) self._rds_endpoint = typing.cast('str', test_info.get("rdsEndpoint")) - self._cluster_name = typing.cast('str', test_info.get("clusterName")) + self._db_name = typing.cast('str', test_info.get("rdsDbName")) self._iam_user_name = typing.cast('str', test_info.get("iamUsername")) + self._bg_deployment_id = typing.cast('str', test_info.get("blueGreenDeploymentId")) + self._cluster_parameter_group = typing.cast('str', test_info.get("clusterParameterGroupName")) + self._random_base = typing.cast('str', test_info.get("randomBase")) database_info_dict: Dict[str, Any] = typing.cast('Dict[str, Any]', test_info.get("databaseInfo")) if database_info_dict is not None: @@ -95,8 +101,8 @@ def get_region(self) -> str: def get_rds_endpoint(self) -> str: return self._rds_endpoint - def get_cluster_name(self) -> str: - return self._cluster_name + def get_db_name(self) -> str: + return self._db_name def get_iam_user_name(self) -> str: return self._iam_user_name diff --git a/tests/integration/host/build.gradle.kts b/tests/integration/host/build.gradle.kts index 09c79a4d..373e9428 100644 --- a/tests/integration/host/build.gradle.kts +++ b/tests/integration/host/build.gradle.kts @@ -68,7 +68,9 @@ tasks.register("test-python-3.11-mysql") { doFirst { systemProperty("exclude-performance", "true") systemProperty("exclude-python-38", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-pg-driver", "true") systemProperty("exclude-pg-engine", "true") } @@ -80,7 +82,9 @@ tasks.register("test-python-3.8-mysql") { doFirst { systemProperty("exclude-performance", "true") systemProperty("exclude-python-311", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-pg-driver", "true") systemProperty("exclude-pg-engine", "true") } @@ -92,7 +96,9 @@ tasks.register("test-python-3.11-pg") { doFirst { systemProperty("exclude-performance", "true") systemProperty("exclude-python-38", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-mysql-driver", "true") systemProperty("exclude-mysql-engine", "true") systemProperty("exclude-mariadb-driver", "true") @@ -106,7 +112,9 @@ tasks.register("test-python-3.8-pg") { doFirst { systemProperty("exclude-performance", "true") systemProperty("exclude-python-311", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-mysql-driver", "true") systemProperty("exclude-mysql-engine", "true") systemProperty("exclude-mariadb-driver", "true") @@ -119,7 +127,9 @@ tasks.register("test-docker") { filter.includeTestsMatching("integration.host.TestRunner.runTests") doFirst { systemProperty("exclude-aurora", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-python-38", "true") } @@ -130,7 +140,9 @@ tasks.register("test-aurora") { filter.includeTestsMatching("integration.host.TestRunner.runTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-python-38", "true") } @@ -141,7 +153,9 @@ tasks.register("test-pg-aurora") { filter.includeTestsMatching("integration.host.TestRunner.runTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-mysql-driver", "true") systemProperty("exclude-mysql-engine", "true") @@ -155,7 +169,9 @@ tasks.register("test-mysql-aurora") { filter.includeTestsMatching("integration.host.TestRunner.runTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-pg-driver", "true") systemProperty("exclude-pg-engine", "true") @@ -170,6 +186,7 @@ tasks.register("test-multi-az") { systemProperty("exclude-performance", "true") systemProperty("exclude-aurora", "true") systemProperty("exclude-python-38", "true") + systemProperty("exclude-bg", "true") } } @@ -184,6 +201,7 @@ tasks.register("test-pg-multi-az") { systemProperty("exclude-mysql-engine", "true") systemProperty("exclude-mariadb-driver", "true") systemProperty("exclude-mariadb-engine", "true") + systemProperty("exclude-bg", "true") } } @@ -196,6 +214,7 @@ tasks.register("test-mysql-multi-az") { systemProperty("exclude-aurora", "true") systemProperty("exclude-pg-driver", "true") systemProperty("exclude-pg-engine", "true") + systemProperty("exclude-bg", "true") } } @@ -208,6 +227,7 @@ tasks.register("test-autoscaling") { systemProperty("exclude-performance", "true") systemProperty("exclude-mysql-driver", "true") systemProperty("exclude-mysql-engine", "true") + systemProperty("exclude-bg", "true") } } @@ -216,7 +236,9 @@ tasks.register("test-pg-aurora-performance") { filter.includeTestsMatching("integration.host.TestRunner.runTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-iam", "true") systemProperty("exclude-secrets-manager", "true") systemProperty("exclude-mysql-driver", "true") @@ -231,7 +253,9 @@ tasks.register("test-mysql-aurora-performance") { filter.includeTestsMatching("integration.host.TestRunner.runTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-iam", "true") systemProperty("exclude-secrets-manager", "true") systemProperty("exclude-pg-driver", "true") @@ -247,6 +271,7 @@ tasks.register("debug-all-environments") { doFirst { systemProperty("exclude-performance", "true") systemProperty("exclude-python-38", "true") + systemProperty("exclude-bg", "true") } } @@ -255,7 +280,9 @@ tasks.register("debug-docker") { filter.includeTestsMatching("integration.host.TestRunner.debugTests") doFirst { systemProperty("exclude-aurora", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-python-38", "true") } @@ -266,7 +293,9 @@ tasks.register("debug-aurora") { filter.includeTestsMatching("integration.host.TestRunner.debugTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-python-38", "true") } @@ -277,7 +306,9 @@ tasks.register("debug-pg-aurora") { filter.includeTestsMatching("integration.host.TestRunner.debugTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-mysql-driver", "true") systemProperty("exclude-mysql-engine", "true") @@ -289,7 +320,9 @@ tasks.register("debug-mysql-aurora") { filter.includeTestsMatching("integration.host.TestRunner.debugTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-pg-driver", "true") systemProperty("exclude-pg-engine", "true") @@ -302,7 +335,9 @@ tasks.register("debug-autoscaling") { doFirst { systemProperty("test-autoscaling", "true") systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-mysql-driver", "true") systemProperty("exclude-mysql-engine", "true") @@ -314,7 +349,9 @@ tasks.register("debug-pg-aurora-performance") { filter.includeTestsMatching("integration.host.TestRunner.debugTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-iam", "true") systemProperty("exclude-secrets-manager", "true") systemProperty("exclude-mysql-driver", "true") @@ -329,7 +366,9 @@ tasks.register("debug-mysql-aurora-performance") { filter.includeTestsMatching("integration.host.TestRunner.debugTests") doFirst { systemProperty("exclude-docker", "true") - systemProperty("exclude-multi-az", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-bg", "true") systemProperty("exclude-iam", "true") systemProperty("exclude-secrets-manager", "true") systemProperty("exclude-pg-driver", "true") @@ -345,6 +384,7 @@ tasks.register("debug-multi-az") { systemProperty("exclude-aurora", "true") systemProperty("exclude-performance", "true") systemProperty("exclude-python-38", "true") + systemProperty("exclude-bg", "true") } } @@ -357,6 +397,7 @@ tasks.register("debug-pg-multi-az") { systemProperty("exclude-performance", "true") systemProperty("exclude-mysql-driver", "true") systemProperty("exclude-mysql-engine", "true") + systemProperty("exclude-bg", "true") } } @@ -369,5 +410,6 @@ tasks.register("debug-mysql-multi-az") { systemProperty("exclude-performance", "true") systemProperty("exclude-pg-driver", "true") systemProperty("exclude-pg-engine", "true") + systemProperty("exclude-bg", "true") } } diff --git a/tests/integration/host/src/test/java/integration/DatabaseEngineDeployment.java b/tests/integration/host/src/test/java/integration/DatabaseEngineDeployment.java index 0126e0f2..d7273e30 100644 --- a/tests/integration/host/src/test/java/integration/DatabaseEngineDeployment.java +++ b/tests/integration/host/src/test/java/integration/DatabaseEngineDeployment.java @@ -19,6 +19,7 @@ public enum DatabaseEngineDeployment { DOCKER, RDS, - RDS_MULTI_AZ, + RDS_MULTI_AZ_CLUSTER, + RDS_MULTI_AZ_INSTANCE, AURORA } diff --git a/tests/integration/host/src/test/java/integration/DriverHelper.java b/tests/integration/host/src/test/java/integration/DriverHelper.java index a06d0687..b4c4f679 100644 --- a/tests/integration/host/src/test/java/integration/DriverHelper.java +++ b/tests/integration/host/src/test/java/integration/DriverHelper.java @@ -16,18 +16,10 @@ package integration; -import com.mysql.cj.conf.PropertyKey; import java.sql.Connection; -import java.sql.Driver; import java.sql.DriverManager; import java.sql.SQLException; -import java.util.Collections; -import java.util.List; -import java.util.Properties; -import java.util.concurrent.TimeUnit; -import java.util.logging.Level; import java.util.logging.Logger; -import org.postgresql.PGProperty; import org.testcontainers.shaded.org.apache.commons.lang3.NotImplementedException; public class DriverHelper { @@ -45,51 +37,6 @@ public static String getDriverProtocol(DatabaseEngine databaseEngine) { } } - public static Connection getDriverConnection(TestEnvironmentInfo info) throws SQLException { - final String url = - String.format( - "%s%s:%d/%s", - DriverHelper.getDriverProtocol(info.getRequest().getDatabaseEngine()), - info.getDatabaseInfo().getClusterEndpoint(), - info.getDatabaseInfo().getClusterEndpointPort(), - info.getDatabaseInfo().getDefaultDbName()); - return DriverManager.getConnection(url, info.getDatabaseInfo().getUsername(), info.getDatabaseInfo().getPassword()); - } - - public static String getDriverProtocol(DatabaseEngine databaseEngine, TestDriver testDriver) { - switch (testDriver) { - case MYSQL: - return "jdbc:mysql://"; - case PG: - return "jdbc:postgresql://"; - default: - throw new NotImplementedException(testDriver.toString()); - } - } - - public static void registerDriver(DatabaseEngine engine) { - try { - Class.forName(DriverHelper.getDriverClassname(engine)); - } catch (ClassNotFoundException e) { - throw new RuntimeException( - "Driver not found: " - + DriverHelper.getDriverClassname(engine), - e); - } - } - - public static String getWrapperDriverProtocol( - DatabaseEngine databaseEngine, TestDriver testDriver) { - switch (testDriver) { - case MYSQL: - return "jdbc:aws-wrapper:mysql://"; - case PG: - return "jdbc:aws-wrapper:postgresql://"; - default: - throw new NotImplementedException(testDriver.toString()); - } - } - public static String getDriverClassname(DatabaseEngine databaseEngine) { switch (databaseEngine) { case MYSQL: @@ -112,95 +59,40 @@ public static String getDriverClassname(TestDriver testDriver) { } } - public static String getHostnameSql(DatabaseEngine databaseEngine) { - switch (databaseEngine) { - case MYSQL: - return "SELECT @@hostname"; - case PG: - return "SELECT inet_server_addr()"; - default: - throw new NotImplementedException(databaseEngine.toString()); - } - } - - public static void setConnectTimeout( - TestDriver testDriver, Properties props, long timeout, TimeUnit timeUnit) { - switch (testDriver) { - case MYSQL: - props.setProperty( - PropertyKey.connectTimeout.getKeyName(), String.valueOf(timeUnit.toMillis(timeout))); - break; - case PG: - props.setProperty( - PGProperty.CONNECT_TIMEOUT.getName(), String.valueOf(timeUnit.toSeconds(timeout))); - break; - default: - throw new NotImplementedException(testDriver.toString()); - } - } - - public static void setSocketTimeout( - TestDriver testDriver, Properties props, long timeout, TimeUnit timeUnit) { - switch (testDriver) { - case MYSQL: - props.setProperty( - PropertyKey.socketTimeout.getKeyName(), String.valueOf(timeUnit.toMillis(timeout))); - break; - case PG: - props.setProperty( - PGProperty.SOCKET_TIMEOUT.getName(), String.valueOf(timeUnit.toSeconds(timeout))); - break; - default: - throw new NotImplementedException(testDriver.toString()); - } - } - - public static void setTcpKeepAlive(TestDriver testDriver, Properties props, boolean enabled) { - switch (testDriver) { - case MYSQL: - props.setProperty(PropertyKey.tcpKeepAlive.getKeyName(), String.valueOf(enabled)); - break; - case PG: - props.setProperty(PGProperty.TCP_KEEP_ALIVE.getName(), String.valueOf(enabled)); - break; - default: - throw new NotImplementedException(testDriver.toString()); - } - } - - public static void setMonitoringConnectTimeout( - TestDriver testDriver, Properties props, long timeout, TimeUnit timeUnit) { - switch (testDriver) { - case MYSQL: - props.setProperty( - "monitoring-" + PropertyKey.connectTimeout.getKeyName(), - String.valueOf(timeUnit.toMillis(timeout))); - break; - case PG: - props.setProperty( - "monitoring-" + PGProperty.CONNECT_TIMEOUT.getName(), - String.valueOf(timeUnit.toSeconds(timeout))); - break; - default: - throw new NotImplementedException(testDriver.toString()); + public static void registerDriver(DatabaseEngine engine) { + try { + Class.forName(DriverHelper.getDriverClassname(engine)); + } catch (ClassNotFoundException e) { + throw new RuntimeException( + "Driver not found: " + + DriverHelper.getDriverClassname(engine), + e); } } - public static void setMonitoringSocketTimeout( - TestDriver testDriver, Properties props, long timeout, TimeUnit timeUnit) { - switch (testDriver) { - case MYSQL: - props.setProperty( - "monitoring-" + PropertyKey.socketTimeout.getKeyName(), - String.valueOf(timeUnit.toMillis(timeout))); + public static Connection getDriverConnection(TestEnvironmentInfo info) throws SQLException { + String url; + switch (info.getRequest().getDatabaseEngineDeployment()) { + case AURORA: + case RDS_MULTI_AZ_CLUSTER: + url = String.format( + "%s%s:%d/%s", + DriverHelper.getDriverProtocol(info.getRequest().getDatabaseEngine()), + info.getDatabaseInfo().getClusterEndpoint(), + info.getDatabaseInfo().getClusterEndpointPort(), + info.getDatabaseInfo().getDefaultDbName()); break; - case PG: - props.setProperty( - "monitoring-" + PGProperty.SOCKET_TIMEOUT.getName(), - String.valueOf(timeUnit.toSeconds(timeout))); + case RDS_MULTI_AZ_INSTANCE: + url = String.format( + "%s%s:%d/%s", + DriverHelper.getDriverProtocol(info.getRequest().getDatabaseEngine()), + info.getDatabaseInfo().getInstances().get(0).getHost(), + info.getDatabaseInfo().getInstances().get(0).getPort(), + info.getDatabaseInfo().getDefaultDbName()); break; default: - throw new NotImplementedException(testDriver.toString()); + throw new UnsupportedOperationException(info.getRequest().getDatabaseEngineDeployment().toString()); } + return DriverManager.getConnection(url, info.getDatabaseInfo().getUsername(), info.getDatabaseInfo().getPassword()); } } diff --git a/tests/integration/host/src/test/java/integration/TestEnvironmentFeatures.java b/tests/integration/host/src/test/java/integration/TestEnvironmentFeatures.java index 6cf8514a..a80defb9 100644 --- a/tests/integration/host/src/test/java/integration/TestEnvironmentFeatures.java +++ b/tests/integration/host/src/test/java/integration/TestEnvironmentFeatures.java @@ -24,9 +24,10 @@ public enum TestEnvironmentFeatures { NETWORK_OUTAGES_ENABLED, AWS_CREDENTIALS_ENABLED, PERFORMANCE, - RUN_AUTOSCALING_TESTS_ONLY, SKIP_MYSQL_DRIVER_TESTS, SKIP_PG_DRIVER_TESTS, + RUN_AUTOSCALING_TESTS_ONLY, TELEMETRY_TRACES_ENABLED, - TELEMETRY_METRICS_ENABLED + TELEMETRY_METRICS_ENABLED, + BLUE_GREEN_DEPLOYMENT } diff --git a/tests/integration/host/src/test/java/integration/TestEnvironmentInfo.java b/tests/integration/host/src/test/java/integration/TestEnvironmentInfo.java index 81d05483..6d1a1ee1 100644 --- a/tests/integration/host/src/test/java/integration/TestEnvironmentInfo.java +++ b/tests/integration/host/src/test/java/integration/TestEnvironmentInfo.java @@ -26,7 +26,7 @@ public class TestEnvironmentInfo { private String region; private String rdsEndpoint; - private String clusterName; + private String rdsDbName; private String iamUsername; private TestDatabaseInfo databaseInfo; @@ -36,6 +36,13 @@ public class TestEnvironmentInfo { private TestTelemetryInfo tracesTelemetryInfo; private TestTelemetryInfo metricsTelemetryInfo; + private String blueGreenDeploymentId; + + private String clusterParameterGroupName = null; + + // Random alphanumeric combination that is used to form a test cluster name or an instance name. + private String randomBase = null; + public TestDatabaseInfo getDatabaseInfo() { return this.databaseInfo; } @@ -84,8 +91,8 @@ public String getRdsEndpoint() { return this.rdsEndpoint; } - public String getClusterName() { - return this.clusterName; + public String getRdsDbName() { + return this.rdsDbName; } public String getIamUsername() { @@ -104,8 +111,8 @@ public void setRdsEndpoint(String rdsEndpoint) { this.rdsEndpoint = rdsEndpoint; } - public void setClusterName(String clusterName) { - this.clusterName = clusterName; + public void setRdsDbName(String auroraClusterName) { + this.rdsDbName = auroraClusterName; } public void setDatabaseInfo(TestDatabaseInfo databaseInfo) { @@ -147,4 +154,28 @@ public void setAwsSessionToken(String awsSessionToken) { public void setIamUsername(String iamUsername) { this.iamUsername = iamUsername; } + + public String getBlueGreenDeploymentId() { + return this.blueGreenDeploymentId; + } + + public void setBlueGreenDeploymentId(final String blueGreenDeploymentId) { + this.blueGreenDeploymentId = blueGreenDeploymentId; + } + + public String getClusterParameterGroupName() { + return this.clusterParameterGroupName; + } + + public void setClusterParameterGroupName(String clusterParameterGroupName) { + this.clusterParameterGroupName = clusterParameterGroupName; + } + + public String getRandomBase() { + return this.randomBase; + } + + public void setRandomBase(String randomBase) { + this.randomBase = randomBase; + } } diff --git a/tests/integration/host/src/test/java/integration/TestInstanceInfo.java b/tests/integration/host/src/test/java/integration/TestInstanceInfo.java index 256006e0..250d1932 100644 --- a/tests/integration/host/src/test/java/integration/TestInstanceInfo.java +++ b/tests/integration/host/src/test/java/integration/TestInstanceInfo.java @@ -49,10 +49,6 @@ public int getPort() { } public String getUrl() { - String url = host + ":" + port; - if (!url.endsWith("/")) { - url += "/"; - } - return url; + return host + ":" + port + "/"; } } diff --git a/tests/integration/host/src/test/java/integration/host/TestEnvironment.java b/tests/integration/host/src/test/java/integration/host/TestEnvironment.java index 7cabd4b2..61c9cf8b 100644 --- a/tests/integration/host/src/test/java/integration/host/TestEnvironment.java +++ b/tests/integration/host/src/test/java/integration/host/TestEnvironment.java @@ -16,8 +16,6 @@ package integration.host; -import static org.junit.jupiter.api.Assertions.assertEquals; - import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.rekawek.toxiproxy.ToxiproxyClient; @@ -34,24 +32,28 @@ import integration.host.TestEnvironmentProvider.EnvPreCreateInfo; import integration.util.AuroraTestUtility; import integration.util.ContainerHelper; +import integration.util.StringUtils; import java.io.IOException; -import java.net.URISyntaxException; import java.net.UnknownHostException; import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; +import java.util.List; +import java.util.Random; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Logger; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.Network; import org.testcontainers.containers.ToxiproxyContainer; import org.testcontainers.shaded.org.apache.commons.lang3.NotImplementedException; -import integration.util.StringUtils; +import software.amazon.awssdk.services.rds.model.BlueGreenDeployment; import software.amazon.awssdk.services.rds.model.DBCluster; +import software.amazon.awssdk.services.rds.model.DBInstance; public class TestEnvironment implements AutoCloseable { @@ -69,7 +71,9 @@ public class TestEnvironment implements AutoCloseable { protected static final int PROXY_PORT = 8666; private static final TestEnvironmentConfiguration config = new TestEnvironmentConfiguration(); - private static final boolean USE_OTLP_CONTAINER_FOR_TRACES = false; + private static final boolean USE_OTLP_CONTAINER_FOR_TRACES = true; + + private static final AtomicInteger ipAddressUsageRefCount = new AtomicInteger(0); private final TestEnvironmentInfo info = new TestEnvironmentInfo(); // only this info is passed to test container @@ -78,9 +82,9 @@ public class TestEnvironment implements AutoCloseable { // test container. private int numOfInstances; - private boolean reuseAuroraDbCluster; - private String auroraClusterName; // "cluster-mysql" - private String auroraClusterDomain; // "XYZ.us-west-2.rds.amazonaws.com" + private boolean reuseDb; + private String rdsDbName; // "cluster-mysql", "instance-name", "rds-multi-az-cluster-name" + private String rdsDbDomain; // "XYZ.us-west-2.rds.amazonaws.com" private String rdsEndpoint; // "https://rds-int.amazon.com" private String awsAccessKeyId; @@ -103,13 +107,15 @@ private TestEnvironment(TestEnvironmentRequest request) { this.info.setRequest(request); } - public static TestEnvironment build(TestEnvironmentRequest request) throws IOException, URISyntaxException { + public static TestEnvironment build(TestEnvironmentRequest request) throws IOException { + LOGGER.finest("Building test env: " + request.getEnvPreCreateIndex()); preCreateEnvironment(request.getEnvPreCreateIndex()); TestEnvironment env; - switch (request.getDatabaseEngineDeployment()) { + DatabaseEngineDeployment deployment = request.getDatabaseEngineDeployment(); + switch (deployment) { case DOCKER: env = new TestEnvironment(request); initDatabaseParams(env); @@ -124,11 +130,21 @@ public static TestEnvironment build(TestEnvironmentRequest request) throws IOExc TestEnvironmentFeatures.FAILOVER_SUPPORTED.toString()); } + if (request.getFeatures().contains(TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT)) { + throw new UnsupportedOperationException( + TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT.toString()); + } + break; case AURORA: - case RDS_MULTI_AZ: + case RDS_MULTI_AZ_CLUSTER: + case RDS_MULTI_AZ_INSTANCE: + env = createAuroraOrMultiAzEnvironment(request); - authorizeIP(env); + + if (request.getFeatures().contains(TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT)) { + createBlueGreenDeployment(env); + } break; @@ -156,7 +172,27 @@ public static TestEnvironment build(TestEnvironmentRequest request) throws IOExc return env; } - private static TestEnvironment createAuroraOrMultiAzEnvironment(TestEnvironmentRequest request) throws URISyntaxException { + private static void authorizeRunnerIpAddress(TestEnvironment env) { + DatabaseEngineDeployment deployment = env.info.getRequest().getDatabaseEngineDeployment(); + if (deployment == DatabaseEngineDeployment.AURORA + || deployment == DatabaseEngineDeployment.RDS + || deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE + || deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER) { + // These environment require creating external database cluster that should be publicly available. + // Corresponding AWS Security Groups should be configured and the test task runner IP address + // should be whitelisted. + + if (env.info.getRequest().getFeatures().contains(TestEnvironmentFeatures.AWS_CREDENTIALS_ENABLED)) { + if (ipAddressUsageRefCount.incrementAndGet() == 1) { + authorizeIP(env); + } else { + LOGGER.finest("IP usage count: " + ipAddressUsageRefCount.get()); + } + } + } + } + + private static TestEnvironment createAuroraOrMultiAzEnvironment(TestEnvironmentRequest request) { EnvPreCreateInfo preCreateInfo = TestEnvironmentProvider.preCreateInfos.get(request.getEnvPreCreateIndex()); @@ -189,8 +225,16 @@ private static TestEnvironment createAuroraOrMultiAzEnvironment(TestEnvironmentR } if (result instanceof TestEnvironment) { TestEnvironment resultTestEnvironment = (TestEnvironment) result; - LOGGER.finer(() -> String.format("Use pre-created DB cluster: %s.cluster-%s", - resultTestEnvironment.auroraClusterName, resultTestEnvironment.auroraClusterDomain)); + final DatabaseEngineDeployment deployment = + resultTestEnvironment.info.getRequest().getDatabaseEngineDeployment(); + if (deployment == DatabaseEngineDeployment.AURORA + || deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER) { + LOGGER.finer(() -> String.format("Use pre-created DB cluster: %s.cluster-%s", + resultTestEnvironment.rdsDbName, resultTestEnvironment.rdsDbDomain)); + } else { + LOGGER.finer(() -> String.format("Use pre-created DB : %s.%s", + resultTestEnvironment.rdsDbName, resultTestEnvironment.rdsDbDomain)); + } return resultTestEnvironment; } @@ -199,19 +243,148 @@ private static TestEnvironment createAuroraOrMultiAzEnvironment(TestEnvironmentR } else { TestEnvironment env = new TestEnvironment(request); + initRandomBase(env); initDatabaseParams(env); - createDbCluster(env); + initAwsCredentials(env); + + switch (request.getDatabaseEngineDeployment()) { + case RDS_MULTI_AZ_INSTANCE: + initEnv(env); + authorizeRunnerIpAddress(env); + createMultiAzInstance(env); + configureIamAccess(env); + break; + case RDS_MULTI_AZ_CLUSTER: + initEnv(env); + authorizeRunnerIpAddress(env); + createDbCluster(env); + configureIamAccess(env); + break; + case AURORA: + initEnv(env); + authorizeRunnerIpAddress(env); + + if (!env.reuseDb + && env.info.getRequest().getFeatures().contains(TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT)) { + createCustomClusterParameterGroup(env); + } + createDbCluster(env); + configureIamAccess(env); + break; + default: + throw new NotImplementedException(request.getDatabaseEngineDeployment().toString()); + } + + return env; + } + + } + + private static void createBlueGreenDeployment(TestEnvironment env) { - if (request.getFeatures().contains(TestEnvironmentFeatures.IAM)) { - if (request.getDatabaseEngineDeployment() == DatabaseEngineDeployment.RDS_MULTI_AZ) { - throw new RuntimeException("IAM isn't supported by " + DatabaseEngineDeployment.RDS_MULTI_AZ); + if (env.info.getRequest().getDatabaseEngineDeployment() == DatabaseEngineDeployment.AURORA) { + DBCluster clusterInfo = env.auroraUtil.getClusterInfo(env.rdsDbName); + if (env.reuseDb) { + BlueGreenDeployment bgDeployment = env.auroraUtil.getBlueGreenDeploymentBySource(clusterInfo.dbClusterArn()); + if (bgDeployment != null) { + env.info.setBlueGreenDeploymentId(bgDeployment.blueGreenDeploymentIdentifier()); + waitForBlueGreenClustersHaveRightState(env, bgDeployment); + return; } - configureIamAccess(env); } - return env; + // otherwise, create a new BG deployment + final String blueGreenId = env.auroraUtil.createBlueGreenDeployment( + env.rdsDbName, clusterInfo.dbClusterArn()); + env.info.setBlueGreenDeploymentId(blueGreenId); + + BlueGreenDeployment bgDeployment = env.auroraUtil.getBlueGreenDeployment(blueGreenId); + if (bgDeployment != null) { + waitForBlueGreenClustersHaveRightState(env, bgDeployment); + } + + } else if (env.info.getRequest().getDatabaseEngineDeployment() == DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE) { + DBInstance instanceInfo = env.auroraUtil.getRdsInstanceInfo(env.rdsDbName); + if (env.reuseDb) { + BlueGreenDeployment bgDeployment = env.auroraUtil.getBlueGreenDeploymentBySource(instanceInfo.dbInstanceArn()); + if (bgDeployment != null) { + env.info.setBlueGreenDeploymentId(bgDeployment.blueGreenDeploymentIdentifier()); + waitForBlueGreenInstancesHaveRightState(env, bgDeployment); + return; + } + } + + // otherwise, create a new BG deployment + final String blueGreenId = env.auroraUtil.createBlueGreenDeployment( + env.rdsDbName, instanceInfo.dbInstanceArn()); + env.info.setBlueGreenDeploymentId(blueGreenId); + + BlueGreenDeployment bgDeployment = env.auroraUtil.getBlueGreenDeployment(blueGreenId); + if (bgDeployment != null) { + waitForBlueGreenInstancesHaveRightState(env, bgDeployment); + } + + } else { + LOGGER.warning("BG Deployments are supported for RDS MultiAz Instances and Aurora clusters only." + + " Proceed without creating BG Deployment."); + } + } + + private static void waitForBlueGreenClustersHaveRightState(TestEnvironment env, BlueGreenDeployment bgDeployment) { + + DBCluster blueClusterInfo = env.auroraUtil.getClusterByArn(bgDeployment.source()); + if (blueClusterInfo != null) { + try { + env.auroraUtil.waitUntilClusterHasRightState(blueClusterInfo.dbClusterIdentifier()); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new RuntimeException(ex); + } } + DBCluster greenClusterInfo = env.auroraUtil.getClusterByArn(bgDeployment.target()); + if (greenClusterInfo != null) { + try { + env.auroraUtil.waitUntilClusterHasRightState(greenClusterInfo.dbClusterIdentifier()); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new RuntimeException(ex); + } + } + } + + private static void waitForBlueGreenInstancesHaveRightState(TestEnvironment env, BlueGreenDeployment bgDeployment) { + + DBInstance blueInstanceInfo = env.auroraUtil.getRdsInstanceInfoByArn(bgDeployment.source()); + if (blueInstanceInfo != null) { + try { + env.auroraUtil.waitUntilInstanceHasRightState( + blueInstanceInfo.dbInstanceIdentifier(), "available"); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new RuntimeException(ex); + } + } + + DBInstance greenInstanceInfo = env.auroraUtil.getRdsInstanceInfoByArn(bgDeployment.target()); + if (greenInstanceInfo != null) { + try { + env.auroraUtil.waitUntilInstanceHasRightState( + greenInstanceInfo.dbInstanceIdentifier(), "available"); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new RuntimeException(ex); + } + } + } + + private static void createCustomClusterParameterGroup(TestEnvironment env) { + String groupName = String.format("test-cpg-%s", env.info.getRandomBase()); + String engine = getDbEngine(env.info.getRequest()); + String engineVersion = getDbEngineVersion(engine, env); + env.auroraUtil.createCustomClusterParameterGroup( + groupName, engine, engineVersion, env.info.getRequest().getDatabaseEngine()); + env.info.setClusterParameterGroupName(groupName); } private static void createDatabaseContainers(TestEnvironment env) { @@ -284,7 +457,7 @@ private static void createDatabaseContainers(TestEnvironment env) { } } - private static void createDbCluster(TestEnvironment env) throws URISyntaxException { + private static void createDbCluster(TestEnvironment env) { switch (env.info.getRequest().getDatabaseInstances()) { case SINGLE_INSTANCE: @@ -296,11 +469,21 @@ private static void createDbCluster(TestEnvironment env) throws URISyntaxExcepti initAwsCredentials(env); env.numOfInstances = env.info.getRequest().getNumOfInstances(); - if (env.numOfInstances < 1 || env.numOfInstances > 15) { - LOGGER.warning( - env.numOfInstances + " instances were requested but the requested number must be " - + "between 1 and 15. 5 instances will be used as a default."); - env.numOfInstances = 5; + if (env.info.getRequest().getDatabaseEngineDeployment() == DatabaseEngineDeployment.AURORA) { + if (env.numOfInstances < 1 || env.numOfInstances > 15) { + LOGGER.warning( + env.numOfInstances + " instances were requested but the requested number must be " + + "between 1 and 15. 5 instances will be used as a default."); + env.numOfInstances = 5; + } + } + if (env.info.getRequest().getDatabaseEngineDeployment() == DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER) { + if (env.numOfInstances != 3) { + LOGGER.warning( + env.numOfInstances + " instances were requested but the requested number must be 3. " + + "3 instances will be used as a default."); + env.numOfInstances = 3; + } } createDbCluster(env, env.numOfInstances); @@ -310,19 +493,174 @@ private static void createDbCluster(TestEnvironment env) throws URISyntaxExcepti } } - private static void createDbCluster(TestEnvironment env, int numOfInstances) throws URISyntaxException { + private static void createDbCluster(TestEnvironment env, int numOfInstances) { + + if (env.reuseDb) { + if (StringUtils.isNullOrEmpty(env.rdsDbDomain)) { + throw new RuntimeException("Environment variable RDS_DB_DOMAIN is required."); + } + if (StringUtils.isNullOrEmpty(env.rdsDbName)) { + throw new RuntimeException("Environment variable RDS_DB_NAME is required."); + } + + if (!env.auroraUtil.doesClusterExist(env.rdsDbName)) { + throw new RuntimeException( + "It's requested to reuse existing DB cluster but it doesn't exist: " + + env.rdsDbName + + ".cluster-" + + env.rdsDbDomain); + } + LOGGER.finer( + "Reuse existing cluster " + env.rdsDbName + ".cluster-" + env.rdsDbDomain); + + DBCluster clusterInfo = env.auroraUtil.getClusterInfo(env.rdsDbName); + + DatabaseEngine existingClusterDatabaseEngine = env.auroraUtil.getClusterEngine(clusterInfo); + if (existingClusterDatabaseEngine != env.info.getRequest().getDatabaseEngine()) { + throw new RuntimeException( + "Existing cluster is " + + existingClusterDatabaseEngine + + " cluster. " + + env.info.getRequest().getDatabaseEngine() + + " is expected."); + } + + env.info.setDatabaseEngine(clusterInfo.engine()); + env.info.setDatabaseEngineVersion(clusterInfo.engineVersion()); + } else { + if (StringUtils.isNullOrEmpty(env.rdsDbName)) { + int remainingTries = 5; + boolean clusterExists = false; + while (remainingTries-- > 0) { + env.rdsDbName = getRandomName(env); + if (env.auroraUtil.doesClusterExist(env.rdsDbName)) { + clusterExists = true; + env.info.setRandomBase(null); + initRandomBase(env); + LOGGER.finest("Cluster " + env.rdsDbName + " already exists. Pick up another name."); + } else { + clusterExists = false; + LOGGER.finer("Cluster to create: " + env.rdsDbName); + break; + } + } + if (clusterExists) { + throw new RuntimeException("Can't pick up a cluster name."); + } + } + + try { + String engine = getDbEngine(env.info.getRequest()); + String engineVersion = getDbEngineVersion(engine, env); + if (StringUtils.isNullOrEmpty(engineVersion)) { + throw new RuntimeException("Failed to get engine version."); + } + String instanceClass = env.auroraUtil.getDbInstanceClass(env.info.getRequest()); + + LOGGER.finer("Using " + engine + " " + engineVersion); + + env.auroraUtil.createCluster( + env.info.getDatabaseInfo().getUsername(), + env.info.getDatabaseInfo().getPassword(), + env.info.getDatabaseInfo().getDefaultDbName(), + env.rdsDbName, + env.info.getRequest().getDatabaseEngineDeployment(), + env.info.getRegion(), + engine, + instanceClass, + engineVersion, + env.info.getClusterParameterGroupName(), + numOfInstances); + + List dbInstances = env.auroraUtil.getDBInstances(env.rdsDbName); + if (dbInstances.isEmpty()) { + throw new RuntimeException("Failed to get instance information for cluster " + env.rdsDbName); + } + + final String instanceEndpoint = dbInstances.get(0).endpoint().address(); + env.rdsDbDomain = instanceEndpoint.substring(instanceEndpoint.indexOf(".") + 1); + env.info.setDatabaseEngine(engine); + env.info.setDatabaseEngineVersion(engineVersion); + LOGGER.finer( + "Created a new cluster " + env.rdsDbName + ".cluster-" + env.rdsDbDomain); + } catch (Exception e) { + + LOGGER.finer("Error creating a cluster " + env.rdsDbName + ". " + e.getMessage()); + + // remove cluster and instances + LOGGER.finer("Deleting cluster " + env.rdsDbName); + env.auroraUtil.deleteCluster(env.rdsDbName, env.info.getRequest().getDatabaseEngineDeployment(), false); + LOGGER.finer("Deleted cluster " + env.rdsDbName); + + throw new RuntimeException(e); + } + } + + env.info.setRdsDbName(env.rdsDbName); + + int port = getPort(env.info.getRequest()); + + env.info + .getDatabaseInfo() + .setClusterEndpoint(env.rdsDbName + ".cluster-" + env.rdsDbDomain, port); + env.info + .getDatabaseInfo() + .setClusterReadOnlyEndpoint( + env.rdsDbName + ".cluster-ro-" + env.rdsDbDomain, port); + env.info.getDatabaseInfo().setInstanceEndpointSuffix(env.rdsDbDomain, port); + + List instances = env.auroraUtil.getTestInstancesInfo(env.rdsDbName); + env.info.getDatabaseInfo().getInstances().clear(); + env.info.getDatabaseInfo().getInstances().addAll(instances); + + // Make sure the cluster is available and accessible. + try { + env.auroraUtil.waitUntilClusterHasRightState(env.rdsDbName); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new RuntimeException(ex); + } + + // Create an 'rds_tools' extension for RDS PG + final DatabaseEngineDeployment deployment = env.info.getRequest().getDatabaseEngineDeployment(); + final DatabaseEngine engine = env.info.getRequest().getDatabaseEngine(); + if ((DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER.equals(deployment) + || DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE.equals(deployment)) + && DatabaseEngine.PG.equals(engine)) { + DriverHelper.registerDriver(engine); + + try (Connection conn = DriverHelper.getDriverConnection(env.info); + Statement stmt = conn.createStatement()) { + stmt.execute("CREATE EXTENSION IF NOT EXISTS rds_tools"); + } catch (SQLException e) { + throw new RuntimeException("An exception occurred while creating the rds_tools extension.", e); + } + } + } + private static void initEnv(TestEnvironment env) { env.info.setRegion( !StringUtils.isNullOrEmpty(config.rdsDbRegion) ? config.rdsDbRegion : "us-east-2"); - env.reuseAuroraDbCluster = config.reuseRdsCluster; - env.auroraClusterName = config.rdsClusterName; // "cluster-mysql" - env.auroraClusterDomain = config.rdsClusterDomain; // "XYZ.us-west-2.rds.amazonaws.com" - env.rdsEndpoint = config.rdsEndpoint; // "https://rds-int.amazon.com" + env.reuseDb = config.reuseRdsDb; + env.rdsDbName = config.rdsDbName; // "cluster-mysql" + env.rdsDbDomain = config.rdsDbDomain; // "XYZ.us-west-2.rds.amazonaws.com" + env.rdsEndpoint = config.rdsEndpoint; // "XYZ.us-west-2.rds.amazonaws.com" env.info.setRdsEndpoint(env.rdsEndpoint); + env.auroraUtil = + new AuroraTestUtility( + env.info.getRegion(), + env.rdsEndpoint, + env.awsAccessKeyId, + env.awsSecretAccessKey, + env.awsSessionToken); + } + + private static void createMultiAzInstance(TestEnvironment env) { + env.auroraUtil = new AuroraTestUtility( env.info.getRegion(), @@ -333,101 +671,99 @@ private static void createDbCluster(TestEnvironment env, int numOfInstances) thr ArrayList instances = new ArrayList<>(); - if (env.reuseAuroraDbCluster) { - if (StringUtils.isNullOrEmpty(env.auroraClusterDomain)) { - throw new RuntimeException("Environment variable AURORA_CLUSTER_DOMAIN is required."); + if (env.reuseDb) { + if (StringUtils.isNullOrEmpty(env.rdsDbDomain)) { + throw new RuntimeException("Environment variable RDS_DB_DOMAIN is required."); + } + if (StringUtils.isNullOrEmpty(env.rdsDbName)) { + throw new RuntimeException("Environment variable RDS_DB_NAME is required."); } - if (!env.auroraUtil.doesClusterExist(env.auroraClusterName)) { + if (!env.auroraUtil.doesInstanceExist(env.rdsDbName)) { throw new RuntimeException( - "It's requested to reuse existing DB cluster but it doesn't exist: " - + env.auroraClusterName + "It's requested to reuse existing RDS instance but it doesn't exist: " + + env.rdsDbName + "." - + env.auroraClusterDomain); + + env.rdsDbDomain); } LOGGER.finer( - "Reuse existing cluster " + env.auroraClusterName + ".cluster-" + env.auroraClusterDomain); + "Reuse existing RDS Instance " + env.rdsDbName + "." + env.rdsDbDomain); - DBCluster clusterInfo = env.auroraUtil.getClusterInfo(env.auroraClusterName); + DBInstance instanceInfo = env.auroraUtil.getRdsInstanceInfo(env.rdsDbName); - DatabaseEngine existingClusterDatabaseEngine = env.auroraUtil.getClusterEngine(clusterInfo); - if (existingClusterDatabaseEngine != env.info.getRequest().getDatabaseEngine()) { + DatabaseEngine existingRdsInstanceDatabaseEngine = env.auroraUtil.getRdsInstanceEngine(instanceInfo); + if (existingRdsInstanceDatabaseEngine != env.info.getRequest().getDatabaseEngine()) { throw new RuntimeException( - "Existing cluster is " - + existingClusterDatabaseEngine - + " cluster. " + "Existing RDS Instance is " + + existingRdsInstanceDatabaseEngine + + " instance. " + env.info.getRequest().getDatabaseEngine() + " is expected."); } - env.info.setDatabaseEngine(clusterInfo.engine()); - env.info.setDatabaseEngineVersion(clusterInfo.engineVersion()); - instances.addAll(env.auroraUtil.getClusterInstanceIds(env.auroraClusterName)); + env.info.setDatabaseEngine(instanceInfo.engine()); + env.info.setDatabaseEngineVersion(instanceInfo.engineVersion()); + instances.add(new TestInstanceInfo( + instanceInfo.dbInstanceIdentifier(), + instanceInfo.endpoint().address(), + instanceInfo.endpoint().port())); } else { - if (StringUtils.isNullOrEmpty(env.auroraClusterName)) { - env.auroraClusterName = getRandomName(env.info.getRequest()); - LOGGER.finer("Cluster to create: " + env.auroraClusterName); + if (StringUtils.isNullOrEmpty(env.rdsDbName)) { + env.rdsDbName = getRandomName(env); + LOGGER.finer("RDS Instance to create: " + env.rdsDbName); } try { String engine = getDbEngine(env.info.getRequest()); - String engineVersion = getDbEngineVersion(env); + String engineVersion = getDbEngineVersion(engine, env); if (StringUtils.isNullOrEmpty(engineVersion)) { throw new RuntimeException("Failed to get engine version."); } - String instanceClass = getDbInstanceClass(env.info.getRequest()); + String instanceClass = env.auroraUtil.getDbInstanceClass(env.info.getRequest()); - env.auroraClusterDomain = - env.auroraUtil.createCluster( + LOGGER.finer("Using " + engine + " " + engineVersion); + + env.rdsDbDomain = + env.auroraUtil.createMultiAzInstance( env.info.getDatabaseInfo().getUsername(), env.info.getDatabaseInfo().getPassword(), env.info.getDatabaseInfo().getDefaultDbName(), - env.auroraClusterName, + env.rdsDbName, env.info.getRequest().getDatabaseEngineDeployment(), engine, instanceClass, engineVersion, - numOfInstances, instances); + env.info.setDatabaseEngine(engine); env.info.setDatabaseEngineVersion(engineVersion); LOGGER.finer( - "Created a new cluster " + env.auroraClusterName + ".cluster-" + env.auroraClusterDomain); + "Created a new RDS Instance " + env.rdsDbName + "." + env.rdsDbDomain); } catch (Exception e) { - LOGGER.finer("Error creating a cluster " + env.auroraClusterName + ". " + e.getMessage()); + LOGGER.finer("Error creating a RDS Instance " + env.rdsDbName + ". " + e); - // remove cluster and instances - LOGGER.finer("Deleting cluster " + env.auroraClusterName); - env.auroraUtil.deleteCluster(env.auroraClusterName); - LOGGER.finer("Deleted cluster " + env.auroraClusterName); + // remove RDS instance + LOGGER.finer("Deleting RDS Instance " + env.rdsDbName); + env.auroraUtil.deleteMultiAzInstance(env.rdsDbName, false); + LOGGER.finer("Deleted RDS Instance " + env.rdsDbName); throw new RuntimeException(e); } } - env.info.setClusterName(env.auroraClusterName); - int port = getPort(env.info.getRequest()); - - env.info - .getDatabaseInfo() - .setClusterEndpoint(env.auroraClusterName + ".cluster-" + env.auroraClusterDomain, port); - env.info - .getDatabaseInfo() - .setClusterReadOnlyEndpoint( - env.auroraClusterName + ".cluster-ro-" + env.auroraClusterDomain, port); - env.info.getDatabaseInfo().setInstanceEndpointSuffix(env.auroraClusterDomain, port); + env.info.getDatabaseInfo().setInstanceEndpointSuffix(env.rdsDbDomain, port); env.info.getDatabaseInfo().getInstances().clear(); env.info.getDatabaseInfo().getInstances().addAll(instances); - authorizeIP(env); + final DatabaseEngineDeployment deployment = env.info.getRequest().getDatabaseEngineDeployment(); + final DatabaseEngine engine = env.info.getRequest().getDatabaseEngine(); - DatabaseEngineDeployment deployment = env.info.getRequest().getDatabaseEngineDeployment(); - DatabaseEngine engine = env.info.getRequest().getDatabaseEngine(); - if (DatabaseEngineDeployment.RDS_MULTI_AZ.equals(deployment) && DatabaseEngine.PG.equals(engine)) { + // Create 'rds_tools' extension for RDS Instance. + if (DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE.equals(deployment) && DatabaseEngine.PG.equals(engine)) { DriverHelper.registerDriver(engine); try (Connection conn = DriverHelper.getDriverConnection(env.info); @@ -442,29 +778,72 @@ private static void createDbCluster(TestEnvironment env, int numOfInstances) thr private static void authorizeIP(TestEnvironment env) { try { env.runnerIP = env.auroraUtil.getPublicIPAddress(); + LOGGER.finest("Test runner IP: " + env.runnerIP); } catch (UnknownHostException e) { throw new RuntimeException(e); } env.auroraUtil.ec2AuthorizeIP(env.runnerIP); + LOGGER.finest(String.format("Test runner IP %s authorized. Usage count: %d", + env.runnerIP, ipAddressUsageRefCount.get())); } - private static String getRandomName(TestEnvironmentRequest request) { - switch (request.getDatabaseEngine()) { + private static void deAuthorizeIP(TestEnvironment env) { + if (ipAddressUsageRefCount.decrementAndGet() == 0) { + if (env.runnerIP == null) { + try { + env.runnerIP = env.auroraUtil.getPublicIPAddress(); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + } + env.auroraUtil.ec2DeauthorizesIP(env.runnerIP); + LOGGER.finest(String.format("Test runner IP %s de-authorized. Usage count: %d", + env.runnerIP, ipAddressUsageRefCount.get())); + } else { + LOGGER.finest("IP usage count: " + ipAddressUsageRefCount.get()); + } + } + + private static void initRandomBase(TestEnvironment env) { + String randomBase = env.info.getRandomBase(); + if (StringUtils.isNullOrEmpty(randomBase)) { + env.info.setRandomBase(generateRandom(10)); + } + } + + private static String getRandomName(TestEnvironment env) { + + switch (env.info.getRequest().getDatabaseEngine()) { case MYSQL: - return "test-mysql-" + System.nanoTime(); + return "test-mysql-" + env.info.getRandomBase(); case PG: - return "test-pg-" + System.nanoTime(); + return "test-pg-" + env.info.getRandomBase(); default: - return String.valueOf(System.nanoTime()); + return env.info.getRandomBase(); } } + private static String generateRandom(int length) { + String alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"; + + int n = alphabet.length(); + StringBuilder result = new StringBuilder(); + Random r = new Random(); + + for (int i = 0; i < length; i++) { + result.append(alphabet.charAt(r.nextInt(n))); + } + + return result.toString(); + } + private static String getDbEngine(TestEnvironmentRequest request) { switch (request.getDatabaseEngineDeployment()) { case AURORA: return getAuroraDbEngine(request); case RDS: - case RDS_MULTI_AZ: + case RDS_MULTI_AZ_CLUSTER: + case RDS_MULTI_AZ_INSTANCE: return getRdsEngine(request); default: throw new NotImplementedException(request.getDatabaseEngineDeployment().toString()); @@ -493,49 +872,33 @@ private static String getRdsEngine(TestEnvironmentRequest request) { } } - private static String getDbEngineVersion(TestEnvironment env) { - final TestEnvironmentRequest request = env.info.getRequest(); - switch (request.getDatabaseEngineDeployment()) { - case AURORA: - return getAuroraDbEngineVersion(env); - case RDS: - case RDS_MULTI_AZ: - return getRdsEngineVersion(request); - default: - throw new NotImplementedException(request.getDatabaseEngineDeployment().toString()); - } - } - - private static String getAuroraDbEngineVersion(TestEnvironment env) { - String engineName; + private static String getDbEngineVersion(String engineName, TestEnvironment env) { String systemPropertyVersion; TestEnvironmentRequest request = env.info.getRequest(); switch (request.getDatabaseEngine()) { case MYSQL: - engineName = "aurora-mysql"; - systemPropertyVersion = config.auroraMySqlDbEngineVersion; + systemPropertyVersion = config.mysqlVersion; break; case PG: - engineName = "aurora-postgresql"; - systemPropertyVersion = config.auroraPgDbEngineVersion; + systemPropertyVersion = config.pgVersion; break; default: throw new NotImplementedException(request.getDatabaseEngine().toString()); } - return findAuroraDbEngineVersion(env, engineName, systemPropertyVersion); + return findEngineVersion(env, engineName, systemPropertyVersion); } - private static String findAuroraDbEngineVersion( - TestEnvironment env, - String engineName, - String systemPropertyVersion) { + private static String findEngineVersion( + TestEnvironment env, + String engineName, + String systemPropertyVersion) { if (StringUtils.isNullOrEmpty(systemPropertyVersion)) { - return env.auroraUtil.getLTSVersion(engineName); + return env.auroraUtil.getDefaultVersion(engineName); } switch (systemPropertyVersion.toLowerCase()) { - case "lts": - return env.auroraUtil.getLTSVersion(engineName); + case "default": + return env.auroraUtil.getDefaultVersion(engineName); case "latest": return env.auroraUtil.getLatestVersion(engineName); default: @@ -543,29 +906,6 @@ private static String findAuroraDbEngineVersion( } } - private static String getRdsEngineVersion(TestEnvironmentRequest request) { - switch (request.getDatabaseEngine()) { - case MYSQL: - return "8.0.33"; - case PG: - return "15.4"; - default: - throw new NotImplementedException(request.getDatabaseEngine().toString()); - } - } - - private static String getDbInstanceClass(TestEnvironmentRequest request) { - switch (request.getDatabaseEngineDeployment()) { - case AURORA: - return "db.r6g.large"; - case RDS: - case RDS_MULTI_AZ: - return "db.m5d.large"; - default: - throw new NotImplementedException(request.getDatabaseEngine().toString()); - } - } - private static int getPort(TestEnvironmentRequest request) { switch (request.getDatabaseEngine()) { case MYSQL: @@ -579,9 +919,10 @@ private static int getPort(TestEnvironmentRequest request) { private static void initDatabaseParams(TestEnvironment env) { final String dbName = - !StringUtils.isNullOrEmpty(config.dbName) - ? config.dbName - : "test_database"; + config.dbName == null + ? "test_database" + : config.dbName.trim(); + final String dbUsername = !StringUtils.isNullOrEmpty(config.dbUsername) ? config.dbUsername @@ -805,17 +1146,18 @@ private static String getContainerBaseImageName(TestEnvironmentRequest request) private static void configureIamAccess(TestEnvironment env) { - if (env.info.getRequest().getDatabaseEngineDeployment() != DatabaseEngineDeployment.AURORA) { - throw new UnsupportedOperationException( - env.info.getRequest().getDatabaseEngineDeployment().toString()); + if (!env.info.getRequest().getFeatures().contains(TestEnvironmentFeatures.IAM)) { + return; } + final DatabaseEngineDeployment deployment = env.info.getRequest().getDatabaseEngineDeployment(); + env.info.setIamUsername( !StringUtils.isNullOrEmpty(config.iamUser) ? config.iamUser : "jane_doe"); - if (!env.reuseAuroraDbCluster) { + if (!env.reuseDb) { try { Class.forName(DriverHelper.getDriverClassname(env.info.getRequest().getDatabaseEngine())); } catch (ClassNotFoundException e) { @@ -825,22 +1167,42 @@ private static void configureIamAccess(TestEnvironment env) { e); } - final String url = - String.format( + String url; + switch (deployment) { + case AURORA: + case RDS_MULTI_AZ_CLUSTER: + url = String.format( "%s%s:%d/%s", DriverHelper.getDriverProtocol(env.info.getRequest().getDatabaseEngine()), env.info.getDatabaseInfo().getClusterEndpoint(), env.info.getDatabaseInfo().getClusterEndpointPort(), env.info.getDatabaseInfo().getDefaultDbName()); + break; + case RDS_MULTI_AZ_INSTANCE: + url = String.format( + "%s%s:%d/%s", + DriverHelper.getDriverProtocol(env.info.getRequest().getDatabaseEngine()), + env.info.getDatabaseInfo().getInstances().get(0).getHost(), + env.info.getDatabaseInfo().getInstances().get(0).getPort(), + env.info.getDatabaseInfo().getDefaultDbName()); + break; + default: + throw new UnsupportedOperationException(deployment.toString()); + } try { + final boolean useRdsTools = env.info.getRequest().getFeatures() + .contains(TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT) + && env.info.getRequest().getDatabaseEngine() == DatabaseEngine.PG + && env.info.getRequest().getDatabaseEngineDeployment() == DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE; env.auroraUtil.addAuroraAwsIamUser( env.info.getRequest().getDatabaseEngine(), url, env.info.getDatabaseInfo().getUsername(), env.info.getDatabaseInfo().getPassword(), env.info.getIamUsername(), - env.info.getDatabaseInfo().getDefaultDbName()); + env.info.getDatabaseInfo().getDefaultDbName(), + useRdsTools); } catch (SQLException e) { throw new RuntimeException("Error configuring IAM access.", e); @@ -882,21 +1244,14 @@ public void debugTests(String taskName) throws IOException, InterruptedException @Override public void close() throws Exception { - if (this.databaseContainers != null) { - for (GenericContainer container : this.databaseContainers) { - try { - container.stop(); - } catch (Exception ex) { - // ignore - } + for (GenericContainer container : this.databaseContainers) { + try { + container.stop(); + } catch (Exception ex) { + // ignore } - this.databaseContainers.clear(); - } - - if (this.testContainer != null) { - this.testContainer.stop(); - this.testContainer = null; } + this.databaseContainers.clear(); if (this.telemetryXRayContainer != null) { this.telemetryXRayContainer.stop(); @@ -908,6 +1263,11 @@ public void close() throws Exception { this.telemetryOtlpContainer = null; } + if (this.testContainer != null) { + this.testContainer.stop(); + this.testContainer = null; + } + if (this.proxyContainers != null) { for (ToxiproxyContainer proxyContainer : this.proxyContainers) { proxyContainer.stop(); @@ -917,25 +1277,155 @@ public void close() throws Exception { switch (this.info.getRequest().getDatabaseEngineDeployment()) { case AURORA: - case RDS_MULTI_AZ: - deleteDbCluster(); + if (this.info.getRequest().getFeatures().contains(TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT) + && !StringUtils.isNullOrEmpty(this.info.getBlueGreenDeploymentId())) { + deleteBlueGreenDeployment(); + deleteDbCluster(true); + deleteCustomClusterParameterGroup(this.info.getClusterParameterGroupName()); + } else { + deleteDbCluster(false); + } + deAuthorizeIP(this); + break; + case RDS_MULTI_AZ_CLUSTER: + deleteDbCluster(false); + deAuthorizeIP(this); + break; + case RDS_MULTI_AZ_INSTANCE: + if (this.info.getRequest().getFeatures().contains(TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT) + && !StringUtils.isNullOrEmpty(this.info.getBlueGreenDeploymentId())) { + deleteBlueGreenDeployment(); + } + deleteMultiAzInstance(); + deAuthorizeIP(this); break; case RDS: - throw new NotImplementedException(this.info.getRequest().getDatabaseEngineDeployment().toString()); - default: + // not in use at the moment + break; + case DOCKER: + // no external resources to dispose // do nothing + break; + default: + throw new NotImplementedException(this.info.getRequest().getDatabaseEngineDeployment().toString()); + } + } + + private void deleteDbCluster(boolean waitForCompletion) { + if (!this.reuseDb) { + LOGGER.finest("Deleting cluster " + this.rdsDbName + ".cluster-" + this.rdsDbDomain); + auroraUtil.deleteCluster( + this.rdsDbName, this.info.getRequest().getDatabaseEngineDeployment(), waitForCompletion); + LOGGER.finest("Deleted cluster " + this.rdsDbName + ".cluster-" + this.rdsDbDomain); } } - private void deleteDbCluster() { - if (!this.reuseAuroraDbCluster && !StringUtils.isNullOrEmpty(this.runnerIP)) { - auroraUtil.ec2DeauthorizesIP(runnerIP); + private void deleteMultiAzInstance() { + if (!this.reuseDb) { + LOGGER.finest("Deleting MultiAz Instance " + this.rdsDbName + "." + this.rdsDbDomain); + auroraUtil.deleteMultiAzInstance(this.rdsDbName, false); + LOGGER.finest("Deleted MultiAz Instance " + this.rdsDbName + "." + this.rdsDbDomain); } + } + + private void deleteBlueGreenDeployment() throws InterruptedException { + + BlueGreenDeployment blueGreenDeployment; + + switch (this.info.getRequest().getDatabaseEngineDeployment()) { + case AURORA: + if (this.reuseDb) { + break; + } - if (!this.reuseAuroraDbCluster) { - LOGGER.finest("Deleting cluster " + this.auroraClusterName + ".cluster-" + this.auroraClusterDomain); - auroraUtil.deleteCluster(this.auroraClusterName); - LOGGER.finest("Deleted cluster " + this.auroraClusterName + ".cluster-" + this.auroraClusterDomain); + blueGreenDeployment = auroraUtil.getBlueGreenDeployment(this.info.getBlueGreenDeploymentId()); + + if (blueGreenDeployment == null) { + return; + } + + auroraUtil.deleteBlueGreenDeployment(this.info.getBlueGreenDeploymentId(), true); + + // Remove extra DB cluster + + // For BGD in AVAILABLE status: source = blue, target = green + // For BGD in SWITCHOVER_COMPLETED: source = old1, target = blue + LOGGER.finest("BG source: " + blueGreenDeployment.source()); + LOGGER.finest("BG target: " + blueGreenDeployment.target()); + + if ("SWITCHOVER_COMPLETED".equals(blueGreenDeployment.status())) { + // Delete old1 cluster + DBCluster old1ClusterInfo = auroraUtil.getClusterByArn(blueGreenDeployment.source()); + if (old1ClusterInfo != null) { + auroraUtil.waitUntilClusterHasRightState(old1ClusterInfo.dbClusterIdentifier(), "available"); + LOGGER.finest("Deleting Aurora cluster " + old1ClusterInfo.dbClusterIdentifier()); + auroraUtil.deleteCluster( + old1ClusterInfo.dbClusterIdentifier(), + this.info.getRequest().getDatabaseEngineDeployment(), + true); + LOGGER.finest("Deleted Aurora cluster " + old1ClusterInfo.dbClusterIdentifier()); + } + } else { + // Delete green cluster + DBCluster greenClusterInfo = auroraUtil.getClusterByArn(blueGreenDeployment.target()); + if (greenClusterInfo != null) { + auroraUtil.promoteClusterToStandalone(blueGreenDeployment.target()); + LOGGER.finest("Deleting Aurora cluster " + greenClusterInfo.dbClusterIdentifier()); + auroraUtil.deleteCluster( + greenClusterInfo.dbClusterIdentifier(), + this.info.getRequest().getDatabaseEngineDeployment(), + true); + LOGGER.finest("Deleted Aurora cluster " + greenClusterInfo.dbClusterIdentifier()); + } + } + break; + case RDS_MULTI_AZ_INSTANCE: + if (this.reuseDb) { + break; + } + + blueGreenDeployment = auroraUtil.getBlueGreenDeployment(this.info.getBlueGreenDeploymentId()); + + if (blueGreenDeployment == null) { + return; + } + + auroraUtil.deleteBlueGreenDeployment(this.info.getBlueGreenDeploymentId(), true); + + // For BGD in AVAILABLE status: source = blue, target = green + // For BGD in SWITCHOVER_COMPLETED: source = old1, target = blue + LOGGER.finest("BG source: " + blueGreenDeployment.source()); + LOGGER.finest("BG target: " + blueGreenDeployment.target()); + + if ("SWITCHOVER_COMPLETED".equals(blueGreenDeployment.status())) { + // Delete old1 cluster + DBInstance old1InstanceInfo = auroraUtil.getRdsInstanceInfoByArn(blueGreenDeployment.source()); + if (old1InstanceInfo != null) { + LOGGER.finest("Deleting MultiAz Instance " + old1InstanceInfo.dbInstanceIdentifier()); + auroraUtil.deleteMultiAzInstance(old1InstanceInfo.dbInstanceIdentifier(), true); + LOGGER.finest("Deleted MultiAz Instance " + old1InstanceInfo.dbInstanceIdentifier()); + } + } else { + // Delete green cluster + DBInstance greenInstanceInfo = auroraUtil.getRdsInstanceInfoByArn(blueGreenDeployment.target()); + if (greenInstanceInfo != null) { + auroraUtil.promoteInstanceToStandalone(blueGreenDeployment.target()); + LOGGER.finest("Deleting MultiAz Instance " + greenInstanceInfo.dbInstanceIdentifier()); + auroraUtil.deleteMultiAzInstance(greenInstanceInfo.dbInstanceIdentifier(), true); + LOGGER.finest("Deleted MultiAz Instance " + greenInstanceInfo.dbInstanceIdentifier()); + } + } + break; + default: + throw new RuntimeException("Unsupported " + this.info.getRequest().getDatabaseEngineDeployment()); + } + } + + private void deleteCustomClusterParameterGroup(String groupName) { + try { + this.auroraUtil.deleteCustomClusterParameterGroup(groupName); + } catch (Exception ex) { + LOGGER.finest(String.format("Error deleting cluster parameter group %s. %s", groupName, ex)); } } @@ -951,8 +1441,8 @@ private static void preCreateEnvironment(int currentEnvIndex) { if (preCreateInfo.envPreCreateFuture == null && (preCreateInfo.request.getDatabaseEngineDeployment() == DatabaseEngineDeployment.AURORA - || preCreateInfo.request.getDatabaseEngineDeployment() == DatabaseEngineDeployment.RDS - || preCreateInfo.request.getDatabaseEngineDeployment() == DatabaseEngineDeployment.RDS_MULTI_AZ)) { + || preCreateInfo.request.getDatabaseEngineDeployment() == DatabaseEngineDeployment.RDS + || preCreateInfo.request.getDatabaseEngineDeployment() == DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER)) { // run environment creation in advance int finalIndex = index; @@ -964,13 +1454,35 @@ private static void preCreateEnvironment(int currentEnvIndex) { preCreateInfo.envPreCreateFuture = envPreCreateExecutor.submit(() -> { final long startTime = System.nanoTime(); try { + initRandomBase(env); initDatabaseParams(env); - createDbCluster(env); - if (env.info.getRequest().getFeatures().contains(TestEnvironmentFeatures.IAM)) { - if (env.info.getRequest().getDatabaseEngineDeployment() == DatabaseEngineDeployment.RDS_MULTI_AZ) { - throw new RuntimeException("IAM isn't supported by " + DatabaseEngineDeployment.RDS_MULTI_AZ); - } - configureIamAccess(env); + initAwsCredentials(env); + + switch (env.info.getRequest().getDatabaseEngineDeployment()) { + case RDS_MULTI_AZ_INSTANCE: + initEnv(env); + authorizeRunnerIpAddress(env); + createMultiAzInstance(env); + configureIamAccess(env); + break; + case RDS_MULTI_AZ_CLUSTER: + initEnv(env); + authorizeRunnerIpAddress(env); + createDbCluster(env); + configureIamAccess(env); + break; + case AURORA: + initEnv(env); + authorizeRunnerIpAddress(env); + + if (env.info.getRequest().getFeatures().contains(TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT)) { + createCustomClusterParameterGroup(env); + } + createDbCluster(env); + configureIamAccess(env); + break; + default: + throw new NotImplementedException(env.info.getRequest().getDatabaseEngineDeployment().toString()); } return env; diff --git a/tests/integration/host/src/test/java/integration/host/TestEnvironmentConfiguration.java b/tests/integration/host/src/test/java/integration/host/TestEnvironmentConfiguration.java index 6789df0b..f36f8af8 100644 --- a/tests/integration/host/src/test/java/integration/host/TestEnvironmentConfiguration.java +++ b/tests/integration/host/src/test/java/integration/host/TestEnvironmentConfiguration.java @@ -24,8 +24,10 @@ public class TestEnvironmentConfiguration { Boolean.parseBoolean(System.getProperty("exclude-docker", "false")); public boolean excludeAurora = Boolean.parseBoolean(System.getProperty("exclude-aurora", "false")); - public boolean excludeMultiAz = - Boolean.parseBoolean(System.getProperty("exclude-multi-az", "false")); + public boolean excludeMultiAzCluster = + Boolean.parseBoolean(System.getProperty("exclude-multi-az-cluster", "false")); + public boolean excludeMultiAzInstance = + Boolean.parseBoolean(System.getProperty("exclude-multi-az-instance", "false")); public boolean excludePerformance = Boolean.parseBoolean(System.getProperty("exclude-performance", "false")); public boolean excludeMysqlEngine = @@ -58,27 +60,34 @@ public class TestEnvironmentConfiguration { Boolean.parseBoolean(System.getProperty("exclude-traces-telemetry", "false")); public boolean excludeMetricsTelemetry = Boolean.parseBoolean(System.getProperty("exclude-metrics-telemetry", "false")); + public boolean excludeBlueGreen = + Boolean.parseBoolean(System.getProperty("exclude-bg", "true")); + public boolean testBlueGreenOnly = + Boolean.parseBoolean(System.getProperty("test-bg-only", "false")); public boolean excludePython38 = Boolean.parseBoolean(System.getProperty("exclude-python-38", "false")); public boolean excludePython311 = Boolean.parseBoolean(System.getProperty("exclude-python-311", "false")); - public String testFilter = System.getProperty("FILTER"); + public String testFilter = System.getenv("FILTER"); public String rdsDbRegion = System.getenv("RDS_DB_REGION"); - public boolean reuseRdsCluster = Boolean.parseBoolean(System.getenv("REUSE_RDS_CLUSTER")); - public String rdsClusterName = System.getenv("RDS_CLUSTER_NAME"); // "cluster-mysql" - public String rdsClusterDomain = - System.getenv("RDS_CLUSTER_DOMAIN"); // "XYZ.us-west-2.rds.amazonaws.com" + public boolean reuseRdsDb = Boolean.parseBoolean(System.getenv("REUSE_RDS_DB")); + public String rdsDbName = System.getenv("RDS_DB_NAME"); // "cluster-mysql", "instance-name", "cluster-multi-az-name" + public String rdsDbDomain = + System.getenv("RDS_DB_DOMAIN"); // "XYZ.us-west-2.rds.amazonaws.com" + public String rdsEndpoint = - System.getenv("RDS_ENDPOINT"); // "https://rds-int.amazon.com" + System.getenv("RDS_ENDPOINT"); // "https://rds-int.amazon.com" - // Expected values: "latest", "lts", or engine version, for example, "15.4" - // If left as empty, will use LTS version - public String auroraMySqlDbEngineVersion = System.getenv("AURORA_MYSQL_DB_ENGINE_VERSION"); - public String auroraPgDbEngineVersion = System.getenv("AURORA_PG_ENGINE_VERSION"); + // Expected values: "latest", "default", or engine version, for example, "15.4" + // If left as empty, will use default version + public String mysqlVersion = + System.getenv("MYSQL_VERSION"); + public String pgVersion = + System.getenv("PG_VERSION"); public String dbName = System.getenv("DB_DATABASE_NAME"); public String dbUsername = System.getenv("DB_USERNAME"); diff --git a/tests/integration/host/src/test/java/integration/host/TestEnvironmentProvider.java b/tests/integration/host/src/test/java/integration/host/TestEnvironmentProvider.java index d5cd972d..15011003 100644 --- a/tests/integration/host/src/test/java/integration/host/TestEnvironmentProvider.java +++ b/tests/integration/host/src/test/java/integration/host/TestEnvironmentProvider.java @@ -65,7 +65,10 @@ public Stream provideTestTemplateInvocationContex // Not in use. continue; } - if (deployment == DatabaseEngineDeployment.RDS_MULTI_AZ && config.excludeMultiAz) { + if (deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER && config.excludeMultiAzCluster) { + continue; + } + if (deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE && config.excludeMultiAzInstance) { continue; } @@ -96,13 +99,20 @@ public Stream provideTestTemplateInvocationContex if (numOfInstances == 2 && config.excludeInstances2) { continue; } + if (numOfInstances == 3 && config.excludeInstances3) { + continue; + } if (numOfInstances == 5 && config.excludeInstances5) { continue; } - if (deployment == DatabaseEngineDeployment.RDS_MULTI_AZ && numOfInstances != 3) { + if (deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER && numOfInstances != 3) { // Multi-AZ clusters supports only 3 instances continue; } + if (deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE && numOfInstances != 1) { + // Multi-AZ Instances supports only 1 instance + continue; + } if (deployment == DatabaseEngineDeployment.AURORA && numOfInstances == 3) { // Aurora supports clusters with 3 instances but running such tests is similar // to running tests on 5-instance cluster. @@ -118,36 +128,55 @@ public Stream provideTestTemplateInvocationContex continue; } - resultContextList.add( - getEnvironment( - new TestEnvironmentRequest( - engine, - instances, - instances == DatabaseInstances.SINGLE_INSTANCE ? 1 : numOfInstances, - deployment, - targetPythonVersion, - TestEnvironmentFeatures.NETWORK_OUTAGES_ENABLED, - engine == DatabaseEngine.PG ? TestEnvironmentFeatures.ABORT_CONNECTION_SUPPORTED : null, - deployment == DatabaseEngineDeployment.DOCKER - && config.excludeTracesTelemetry - && config.excludeMetricsTelemetry - ? null - : TestEnvironmentFeatures.AWS_CREDENTIALS_ENABLED, - deployment == DatabaseEngineDeployment.DOCKER || config.excludeFailover - ? null - : TestEnvironmentFeatures.FAILOVER_SUPPORTED, - deployment == DatabaseEngineDeployment.DOCKER - || deployment == DatabaseEngineDeployment.RDS_MULTI_AZ - || config.excludeIam - ? null - : TestEnvironmentFeatures.IAM, - config.excludeSecretsManager ? null : TestEnvironmentFeatures.SECRETS_MANAGER, - config.excludePerformance ? null : TestEnvironmentFeatures.PERFORMANCE, - config.excludeMysqlDriver ? TestEnvironmentFeatures.SKIP_MYSQL_DRIVER_TESTS : null, - config.excludePgDriver ? TestEnvironmentFeatures.SKIP_PG_DRIVER_TESTS : null, - config.testAutoscalingOnly ? TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY : null, - config.excludeTracesTelemetry ? null : TestEnvironmentFeatures.TELEMETRY_TRACES_ENABLED, - config.excludeMetricsTelemetry ? null : TestEnvironmentFeatures.TELEMETRY_METRICS_ENABLED))); + for (boolean withBlueGreenFeature : Arrays.asList(true, false)) { + if (!withBlueGreenFeature) { + if (config.testBlueGreenOnly) { + continue; + } + } + if (withBlueGreenFeature) { + if (config.excludeBlueGreen && !config.testBlueGreenOnly) { + continue; + } + // Run BlueGreen test only for MultiAz Instances with 1 node or for Aurora + if (deployment != DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE + && deployment != DatabaseEngineDeployment.AURORA) { + continue; + } + } + + resultContextList.add( + getEnvironment( + new TestEnvironmentRequest( + engine, + instances, + instances == DatabaseInstances.SINGLE_INSTANCE ? 1 : numOfInstances, + deployment, + targetPythonVersion, + TestEnvironmentFeatures.NETWORK_OUTAGES_ENABLED, + engine == DatabaseEngine.PG ? TestEnvironmentFeatures.ABORT_CONNECTION_SUPPORTED : null, + deployment == DatabaseEngineDeployment.DOCKER + && config.excludeTracesTelemetry + && config.excludeMetricsTelemetry + ? null + : TestEnvironmentFeatures.AWS_CREDENTIALS_ENABLED, + deployment == DatabaseEngineDeployment.DOCKER || config.excludeFailover + ? null + : TestEnvironmentFeatures.FAILOVER_SUPPORTED, + deployment == DatabaseEngineDeployment.DOCKER + || deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER + || config.excludeIam + ? null + : TestEnvironmentFeatures.IAM, + config.excludeSecretsManager ? null : TestEnvironmentFeatures.SECRETS_MANAGER, + config.excludePerformance ? null : TestEnvironmentFeatures.PERFORMANCE, + config.excludeMysqlDriver ? TestEnvironmentFeatures.SKIP_MYSQL_DRIVER_TESTS : null, + config.excludePgDriver ? TestEnvironmentFeatures.SKIP_PG_DRIVER_TESTS : null, + config.testAutoscalingOnly ? TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY : null, + config.excludeTracesTelemetry ? null : TestEnvironmentFeatures.TELEMETRY_TRACES_ENABLED, + config.excludeMetricsTelemetry ? null : TestEnvironmentFeatures.TELEMETRY_METRICS_ENABLED, + withBlueGreenFeature ? TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT : null))); + } } } } diff --git a/tests/integration/host/src/test/java/integration/util/AuroraTestUtility.java b/tests/integration/host/src/test/java/integration/util/AuroraTestUtility.java index 919737b8..a04d44a9 100644 --- a/tests/integration/host/src/test/java/integration/util/AuroraTestUtility.java +++ b/tests/integration/host/src/test/java/integration/util/AuroraTestUtility.java @@ -16,10 +16,14 @@ package integration.util; +import static integration.DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE; +import static org.junit.jupiter.api.Assertions.fail; + import integration.DatabaseEngine; import integration.DatabaseEngineDeployment; +import integration.TestEnvironmentFeatures; +import integration.TestEnvironmentRequest; import integration.TestInstanceInfo; - import java.io.BufferedReader; import java.io.InputStreamReader; import java.net.URI; @@ -28,31 +32,45 @@ import java.net.UnknownHostException; import java.sql.Connection; import java.sql.DriverManager; -import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.time.Duration; +import java.time.Instant; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.Optional; -import java.util.Random; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.logging.Logger; import java.util.stream.Collectors; - +import org.checkerframework.checker.nullness.qual.Nullable; +import org.testcontainers.shaded.org.apache.commons.lang3.NotImplementedException; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; -import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.core.exception.SdkClientException; import software.amazon.awssdk.core.waiters.WaiterResponse; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.ec2.Ec2Client; import software.amazon.awssdk.services.ec2.model.DescribeSecurityGroupsResponse; import software.amazon.awssdk.services.ec2.model.Ec2Exception; +import software.amazon.awssdk.services.ec2.model.IpPermission; +import software.amazon.awssdk.services.ec2.model.IpRange; import software.amazon.awssdk.services.rds.RdsClient; import software.amazon.awssdk.services.rds.RdsClientBuilder; +import software.amazon.awssdk.services.rds.model.ApplyMethod; +import software.amazon.awssdk.services.rds.model.BlueGreenDeployment; +import software.amazon.awssdk.services.rds.model.BlueGreenDeploymentNotFoundException; +import software.amazon.awssdk.services.rds.model.CreateBlueGreenDeploymentRequest; +import software.amazon.awssdk.services.rds.model.CreateBlueGreenDeploymentResponse; +import software.amazon.awssdk.services.rds.model.CreateDbClusterParameterGroupRequest; +import software.amazon.awssdk.services.rds.model.CreateDbClusterParameterGroupResponse; import software.amazon.awssdk.services.rds.model.CreateDbClusterRequest; import software.amazon.awssdk.services.rds.model.CreateDbInstanceRequest; import software.amazon.awssdk.services.rds.model.DBCluster; @@ -60,90 +78,95 @@ import software.amazon.awssdk.services.rds.model.DBEngineVersion; import software.amazon.awssdk.services.rds.model.DBInstance; import software.amazon.awssdk.services.rds.model.DbClusterNotFoundException; +import software.amazon.awssdk.services.rds.model.DbInstanceNotFoundException; +import software.amazon.awssdk.services.rds.model.DeleteBlueGreenDeploymentRequest; +import software.amazon.awssdk.services.rds.model.DeleteBlueGreenDeploymentResponse; +import software.amazon.awssdk.services.rds.model.DeleteDbClusterParameterGroupRequest; import software.amazon.awssdk.services.rds.model.DeleteDbClusterResponse; import software.amazon.awssdk.services.rds.model.DeleteDbInstanceRequest; +import software.amazon.awssdk.services.rds.model.DeleteDbInstanceResponse; +import software.amazon.awssdk.services.rds.model.DescribeBlueGreenDeploymentsResponse; import software.amazon.awssdk.services.rds.model.DescribeDbClustersRequest; import software.amazon.awssdk.services.rds.model.DescribeDbClustersResponse; import software.amazon.awssdk.services.rds.model.DescribeDbEngineVersionsRequest; import software.amazon.awssdk.services.rds.model.DescribeDbEngineVersionsResponse; +import software.amazon.awssdk.services.rds.model.DescribeDbInstancesRequest; import software.amazon.awssdk.services.rds.model.DescribeDbInstancesResponse; import software.amazon.awssdk.services.rds.model.Filter; +import software.amazon.awssdk.services.rds.model.InvalidDbClusterStateException; +import software.amazon.awssdk.services.rds.model.InvalidDbInstanceStateException; +import software.amazon.awssdk.services.rds.model.ModifyDbClusterParameterGroupRequest; +import software.amazon.awssdk.services.rds.model.ModifyDbClusterParameterGroupResponse; +import software.amazon.awssdk.services.rds.model.Parameter; +import software.amazon.awssdk.services.rds.model.PromoteReadReplicaDbClusterRequest; +import software.amazon.awssdk.services.rds.model.PromoteReadReplicaDbClusterResponse; +import software.amazon.awssdk.services.rds.model.PromoteReadReplicaRequest; +import software.amazon.awssdk.services.rds.model.PromoteReadReplicaResponse; +import software.amazon.awssdk.services.rds.model.RdsException; import software.amazon.awssdk.services.rds.model.Tag; import software.amazon.awssdk.services.rds.waiters.RdsWaiter; /** - * Creates and destroys AWS RDS Clusters and Instances. To use this functionality the following environment variables + * Provides useful functions for RDS integration testing. To use this functionality the following environment variables * must be defined: - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY */ public class AuroraTestUtility { private static final Logger LOGGER = Logger.getLogger(AuroraTestUtility.class.getName()); - - // Default values - private String dbUsername = "my_test_username"; - private String dbPassword = "my_test_password"; - private String dbName = "test"; - private String dbIdentifier = "test-identifier"; - private DatabaseEngineDeployment dbEngineDeployment; - private String dbEngine = "aurora-postgresql"; - private String dbEngineVersion = "13.9"; - private String dbInstanceClass = "db.r5.large"; - private final String storageType = "io1"; - private final int allocatedStorage = 100; - private final int iops = 1000; - private final Region dbRegion; - private final String dbSecGroup = "default"; - private int numOfInstances = 5; - private ArrayList instances = new ArrayList<>(); + private static final String DUPLICATE_IP_ERROR_CODE = "InvalidPermission.Duplicate"; + private static final String DEFAULT_SECURITY_GROUP = "default"; + private static final String DEFAULT_STORAGE_TYPE = "gp3"; + private static final int DEFAULT_IOPS = 64000; + private static final int DEFAULT_ALLOCATED_STORAGE = 400; + private static final int MULTI_AZ_SIZE = 3; private final RdsClient rdsClient; private final Ec2Client ec2Client; - private static final Random rand = new Random(); - - private static final String DUPLICATE_IP_ERROR_CODE = "InvalidPermission.Duplicate"; public AuroraTestUtility( - String region, String rdsEndpoint, String awsAccessKeyId, String awsSecretAccessKey, String awsSessionToken) - throws URISyntaxException { + String region, String rdsEndpoint, String awsAccessKeyId, String awsSecretAccessKey, String awsSessionToken) { this( - getRegionInternal(region), - rdsEndpoint, - StaticCredentialsProvider.create( - StringUtils.isNullOrEmpty(awsSessionToken) - ? AwsBasicCredentials.create(awsAccessKeyId, awsSecretAccessKey) - : AwsSessionCredentials.create(awsAccessKeyId, awsSecretAccessKey, awsSessionToken))); + getRegionInternal(region), + rdsEndpoint, + StaticCredentialsProvider.create( + StringUtils.isNullOrEmpty(awsSessionToken) + ? AwsBasicCredentials.create(awsAccessKeyId, awsSecretAccessKey) + : AwsSessionCredentials.create(awsAccessKeyId, awsSecretAccessKey, awsSessionToken))); } /** - * Initializes an AmazonRDS & AmazonEC2 client. + * Creates a TestUtility instance. As part of the creation, an RdsClient and Ec2Client are initialized. * - * @param region define AWS Regions, refer to + * @param region The AWS region for the cluster(s) the tests will be running against, refer to * Regions, - * Availability Zones, and Local Zones - * @param credentialsProvider Specific AWS credential provider + * href="https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts + * .RegionsAndAvailabilityZones.html"> + * Regions, Availability Zones, and Local Zones + * @param credentialsProvider The AWS credential provider to use to initialize the RdsClient and Ec2Client. */ - public AuroraTestUtility(Region region, String rdsEndpoint, AwsCredentialsProvider credentialsProvider) - throws URISyntaxException { - dbRegion = region; + public AuroraTestUtility(Region region, String rdsEndpoint, AwsCredentialsProvider credentialsProvider) { final RdsClientBuilder rdsClientBuilder = RdsClient.builder() - .region(dbRegion) - .credentialsProvider(credentialsProvider); + .region(region) + .credentialsProvider(credentialsProvider); if (!StringUtils.isNullOrEmpty(rdsEndpoint)) { - rdsClientBuilder.endpointOverride(new URI(rdsEndpoint)); + try { + rdsClientBuilder.endpointOverride(new URI(rdsEndpoint)); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } } rdsClient = rdsClientBuilder.build(); ec2Client = Ec2Client.builder() - .region(dbRegion) - .credentialsProvider(credentialsProvider) - .build(); + .region(region) + .credentialsProvider(credentialsProvider) + .build(); } protected static Region getRegionInternal(String rdsRegion) { Optional regionOptional = - Region.regions().stream().filter(r -> r.id().equalsIgnoreCase(rdsRegion)).findFirst(); + Region.regions().stream().filter(r -> r.id().equalsIgnoreCase(rdsRegion)).findFirst(); if (regionOptional.isPresent()) { return regionOptional.get(); @@ -152,91 +175,186 @@ protected static Region getRegionInternal(String rdsRegion) { } /** - * Creates RDS Cluster/Instances and waits until they are up, and proper IP whitelisting for databases. + * Creates an RDS cluster based on the passed in details. After the cluster is created, this method will wait + * until it is available, adds the current IP address to the default security group, and create a database with the + * given name within the cluster. * - * @param username Master username for access to database - * @param password Master password for access to database - * @param dbName Database name - * @param identifier Database cluster identifier - * @param engine Database engine to use, refer to - * https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Welcome.html - * @param instanceClass instance class, refer to - * https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.DBInstanceClass.html + * @param username the master username for access to the database + * @param password the master password for access to the database + * @param dbName the database to create within the cluster + * @param identifier the cluster identifier + * @param deployment the engine deployment to use + * @param region the region that the cluster should be created in + * @param engine the engine to use, refer to + * CreateDbClusterRequest.engine + * @param instanceClass the instance class, refer to + * Supported instance classes * @param version the database engine's version - * @return An endpoint for one of the instances + * @param numInstances the number of instances to create for the cluster * @throws InterruptedException when clusters have not started after 30 minutes */ - public String createCluster( + public void createCluster( String username, String password, String dbName, String identifier, DatabaseEngineDeployment deployment, + String region, String engine, String instanceClass, String version, - int numOfInstances, - ArrayList instances) + @Nullable String clusterParameterGroupName, + int numInstances) throws InterruptedException { - this.dbUsername = username; - this.dbPassword = password; - this.dbName = dbName; - this.dbIdentifier = identifier; - this.dbEngineDeployment = deployment; - this.dbEngine = engine; - this.dbInstanceClass = instanceClass; - this.dbEngineVersion = version; - this.numOfInstances = numOfInstances; - this.instances = instances; - - switch (this.dbEngineDeployment) { + + switch (deployment) { case AURORA: - return createAuroraCluster(); - case RDS_MULTI_AZ: - return createMultiAzCluster(); + createAuroraCluster( + username, password, dbName, identifier, region, engine, instanceClass, + version, clusterParameterGroupName, numInstances); + break; + case RDS_MULTI_AZ_CLUSTER: + if (numInstances != MULTI_AZ_SIZE) { + throw new RuntimeException( + "A multi-az cluster with " + numInstances + " instances was requested, but multi-az clusters must have " + + MULTI_AZ_SIZE + " instances."); + } + createMultiAzCluster( + username, password, dbName, identifier, region, engine, instanceClass, version); + break; default: - throw new UnsupportedOperationException(this.dbEngineDeployment.toString()); + throw new UnsupportedOperationException(deployment.toString()); + } + } + + public String createMultiAzInstance( + String username, + String password, + String dbName, + String identifier, + DatabaseEngineDeployment deployment, + String engine, + String instanceClass, + String version, + ArrayList instances) { + + if (deployment != RDS_MULTI_AZ_INSTANCE) { + throw new UnsupportedOperationException(deployment.toString()); + } + + rdsClient.createDBInstance(CreateDbInstanceRequest.builder() + .dbInstanceIdentifier(identifier) + .publiclyAccessible(true) + .dbName(dbName) + .masterUsername(username) + .masterUserPassword(password) + .enableIAMDatabaseAuthentication(true) + .multiAZ(true) + .engine(engine) + .engineVersion(version) + .dbInstanceClass(instanceClass) + .enablePerformanceInsights(false) + .backupRetentionPeriod(1) + .storageEncrypted(true) + .storageType(DEFAULT_STORAGE_TYPE) + .allocatedStorage(DEFAULT_ALLOCATED_STORAGE) + .iops(DEFAULT_IOPS) + .tags(this.getTag()) + .build()); + + // Wait for all instances to be up + final RdsWaiter waiter = rdsClient.waiter(); + WaiterResponse waiterResponse = + waiter.waitUntilDBInstanceAvailable( + (requestBuilder) -> + requestBuilder.filters( + Filter.builder().name("db-instance-id").values(identifier).build()), + (configurationBuilder) -> configurationBuilder.maxAttempts(240).waitTimeout(Duration.ofMinutes(240))); + + if (waiterResponse.matched().exception().isPresent()) { + deleteMultiAzInstance(identifier, false); + throw new RuntimeException( + "Unable to start AWS RDS Instance after waiting for 240 minutes"); + } + + DescribeDbInstancesResponse dbInstancesResult = waiterResponse.matched().response().orElse(null); + if (dbInstancesResult == null) { + throw new RuntimeException("Unable to get instance details."); + } + + final String endpoint = dbInstancesResult.dbInstances().get(0).endpoint().address(); + final String rdsDomainPrefix = endpoint.substring(endpoint.indexOf('.') + 1); + + for (DBInstance instance : dbInstancesResult.dbInstances()) { + instances.add( + new TestInstanceInfo( + instance.dbInstanceIdentifier(), + instance.endpoint().address(), + instance.endpoint().port())); } + + return rdsDomainPrefix; } /** - * Creates RDS Cluster/Instances and waits until they are up, and proper IP whitelisting for databases. + * Creates an RDS Aurora cluster based on the passed in details. After the cluster is created, this method will wait + * until it is available, adds the current IP address to the default security group, and create a database with the + * given name within the cluster. * - * @return An endpoint for one of the instances + * @param username the master username for access to the database + * @param password the master password for access to the database + * @param dbName the database to create within the cluster + * @param identifier the cluster identifier + * @param region the region that the cluster should be created in + * @param engine the engine to use, refer to + * CreateDbClusterRequest.engine + * @param instanceClass the instance class, refer to + * Supported instance classes + * @param version the database engine's version + * @param numInstances the number of instances to create for the cluster * @throws InterruptedException when clusters have not started after 30 minutes */ - public String createAuroraCluster() throws InterruptedException { - // Create Cluster - final Tag testRunnerTag = Tag.builder().key("env").value("test-runner").build(); - + public void createAuroraCluster( + String username, + String password, + String dbName, + String identifier, + String region, + String engine, + String instanceClass, + String version, + @Nullable String clusterParameterGroupName, + int numInstances) + throws InterruptedException { final CreateDbClusterRequest dbClusterRequest = CreateDbClusterRequest.builder() - .dbClusterIdentifier(dbIdentifier) + .dbClusterIdentifier(identifier) .databaseName(dbName) - .masterUsername(dbUsername) - .masterUserPassword(dbPassword) - .sourceRegion(dbRegion.id()) + .masterUsername(username) + .masterUserPassword(password) + .sourceRegion(region) .enableIAMDatabaseAuthentication(true) - .engine(dbEngine) - .engineVersion(dbEngineVersion) + .engine(engine) + .engineVersion(version) .storageEncrypted(true) - .tags(testRunnerTag) + .tags(this.getTag()) + .dbClusterParameterGroupName(clusterParameterGroupName) .build(); rdsClient.createDBCluster(dbClusterRequest); // Create Instances - for (int i = 1; i <= numOfInstances; i++) { - final String instanceName = dbIdentifier + "-" + i; + for (int i = 1; i <= numInstances; i++) { + final String instanceName = identifier + "-" + i; rdsClient.createDBInstance( CreateDbInstanceRequest.builder() - .dbClusterIdentifier(dbIdentifier) + .dbClusterIdentifier(identifier) .dbInstanceIdentifier(instanceName) - .dbInstanceClass(dbInstanceClass) - .engine(dbEngine) - .engineVersion(dbEngineVersion) + .dbInstanceClass(instanceClass) + .engine(engine) + .engineVersion(version) .publiclyAccessible(true) - .tags(testRunnerTag) + .tags(this.getTag()) .build()); } @@ -246,109 +364,151 @@ public String createAuroraCluster() throws InterruptedException { waiter.waitUntilDBInstanceAvailable( (requestBuilder) -> requestBuilder.filters( - Filter.builder().name("db-cluster-id").values(dbIdentifier).build()), - (configurationBuilder) -> configurationBuilder.waitTimeout(Duration.ofMinutes(30))); + Filter.builder().name("db-cluster-id").values(identifier).build()), + (configurationBuilder) -> configurationBuilder.maxAttempts(480).waitTimeout(Duration.ofMinutes(240))); if (waiterResponse.matched().exception().isPresent()) { - deleteCluster(); + deleteCluster(identifier, DatabaseEngineDeployment.AURORA, false); throw new InterruptedException( "Unable to start AWS RDS Cluster & Instances after waiting for 30 minutes"); } - - final DescribeDbInstancesResponse dbInstancesResult = - rdsClient.describeDBInstances( - (builder) -> - builder.filters( - Filter.builder().name("db-cluster-id").values(dbIdentifier).build())); - final String endpoint = dbInstancesResult.dbInstances().get(0).endpoint().address(); - final String clusterDomainPrefix = endpoint.substring(endpoint.indexOf('.') + 1); - - for (DBInstance instance : dbInstancesResult.dbInstances()) { - this.instances.add( - new TestInstanceInfo( - instance.dbInstanceIdentifier(), - instance.endpoint().address(), - instance.endpoint().port())); - } - - return clusterDomainPrefix; } /** - * Creates RDS Cluster/Instances and waits until they are up, and proper IP whitelisting for databases. + * Creates an RDS multi-az cluster based on the passed in details. After the cluster is created, this method will wait + * until it is available, adds the current IP address to the default security group, and create a database with the + * given name within the cluster. * - * @return An endpoint for one of the instances + * @param username the master username for access to the database + * @param password the master password for access to the database + * @param dbName the database to create within the cluster + * @param identifier the cluster identifier + * @param region the region that the cluster should be created in + * @param engine the engine to use, refer to + * CreateDbClusterRequest.engine + * @param instanceClass the instance class, refer to + * Supported instance classes + * @param version the database engine's version * @throws InterruptedException when clusters have not started after 30 minutes */ - public String createMultiAzCluster() throws InterruptedException { - // Create Cluster - final Tag testRunnerTag = Tag.builder().key("env").value("test-runner").build(); + public void createMultiAzCluster(String username, + String password, + String dbName, + String identifier, + String region, + String engine, + String instanceClass, + String version) + throws InterruptedException { CreateDbClusterRequest.Builder clusterBuilder = CreateDbClusterRequest.builder() - .dbClusterIdentifier(dbIdentifier) + .dbClusterIdentifier(identifier) + .publiclyAccessible(true) .databaseName(dbName) - .masterUsername(dbUsername) - .masterUserPassword(dbPassword) - .sourceRegion(dbRegion.id()) - .engine(dbEngine) - .engineVersion(dbEngineVersion) + .masterUsername(username) + .masterUserPassword(password) + .sourceRegion(region) + .engine(engine) + .engineVersion(version) + .enablePerformanceInsights(false) + .backupRetentionPeriod(1) .storageEncrypted(true) - .tags(testRunnerTag); - - clusterBuilder = - clusterBuilder.allocatedStorage(allocatedStorage) - .dbClusterInstanceClass(dbInstanceClass) - .storageType(storageType) - .iops(iops); + .tags(this.getTag()) + .allocatedStorage(DEFAULT_ALLOCATED_STORAGE) + .dbClusterInstanceClass(instanceClass) + .storageType(DEFAULT_STORAGE_TYPE) + .iops(DEFAULT_IOPS); rdsClient.createDBCluster(clusterBuilder.build()); - // For multi-AZ deployments, the cluster instances are created automatically. - - // Wait for all instances to be up + // For multi-AZ deployments, the cluster instances are created automatically. Wait for all instances to be up. final RdsWaiter waiter = rdsClient.waiter(); WaiterResponse waiterResponse = waiter.waitUntilDBInstanceAvailable( (requestBuilder) -> requestBuilder.filters( - Filter.builder().name("db-cluster-id").values(dbIdentifier).build()), + Filter.builder().name("db-cluster-id").values(identifier).build()), (configurationBuilder) -> configurationBuilder.waitTimeout(Duration.ofMinutes(30))); if (waiterResponse.matched().exception().isPresent()) { - deleteCluster(); + deleteCluster(identifier, DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER, false); throw new InterruptedException( "Unable to start AWS RDS Cluster & Instances after waiting for 30 minutes"); } + } + public List getDBInstances(String clusterId) { final DescribeDbInstancesResponse dbInstancesResult = rdsClient.describeDBInstances( (builder) -> - builder.filters( - Filter.builder().name("db-cluster-id").values(dbIdentifier).build())); - final String endpoint = dbInstancesResult.dbInstances().get(0).endpoint().address(); - final String clusterDomainPrefix = endpoint.substring(endpoint.indexOf('.') + 1); + builder.filters(Filter.builder().name("db-cluster-id").values(clusterId).build())); + return dbInstancesResult.dbInstances(); + } - for (DBInstance instance : dbInstancesResult.dbInstances()) { - this.instances.add( - new TestInstanceInfo( - instance.dbInstanceIdentifier(), - instance.endpoint().address(), - instance.endpoint().port())); + public void createCustomClusterParameterGroup( + String groupName, String engine, String engineVersion, DatabaseEngine databaseEngine) { + CreateDbClusterParameterGroupResponse response = rdsClient.createDBClusterParameterGroup( + CreateDbClusterParameterGroupRequest.builder() + .dbClusterParameterGroupName(groupName) + .description("Test custom cluster parameter group for BGD.") + .dbParameterGroupFamily(this.getAuroraParameterGroupFamily(engine, engineVersion)) + .build()); + + if (!response.sdkHttpResponse().isSuccessful()) { + throw new RuntimeException("Error creating custom cluster parameter group. " + response.sdkHttpResponse()); } - return clusterDomainPrefix; + ModifyDbClusterParameterGroupResponse response2; + switch (databaseEngine) { + case MYSQL: + response2 = rdsClient.modifyDBClusterParameterGroup( + ModifyDbClusterParameterGroupRequest.builder() + .dbClusterParameterGroupName(groupName) + .parameters(Parameter.builder() + .parameterName("binlog_format") + .parameterValue("ROW") + .applyMethod(ApplyMethod.PENDING_REBOOT) + .build()) + .build()); + break; + case PG: + response2 = rdsClient.modifyDBClusterParameterGroup( + ModifyDbClusterParameterGroupRequest.builder() + .dbClusterParameterGroupName(groupName) + .parameters(Parameter.builder() + .parameterName("rds.logical_replication") + .parameterValue("true") + .applyMethod(ApplyMethod.PENDING_REBOOT) + .build()) + .build()); + break; + default: + throw new UnsupportedOperationException(databaseEngine.toString()); + } + + if (!response2.sdkHttpResponse().isSuccessful()) { + throw new RuntimeException("Error updating parameter. " + response2.sdkHttpResponse()); + } + } + + public void deleteCustomClusterParameterGroup(String groupName) { + rdsClient.deleteDBClusterParameterGroup( + DeleteDbClusterParameterGroupRequest.builder() + .dbClusterParameterGroupName(groupName) + .build() + ); } /** - * Gets public IP. + * Gets the public IP address for the current machine. * - * @return public IP of user - * @throws UnknownHostException when checkip host isn't available + * @return the public IP address for the current machine + * @throws UnknownHostException when checkip.amazonaws.com isn't available */ public String getPublicIPAddress() throws UnknownHostException { String ip; try { - URL ipChecker = new URL("http://checkip.amazonaws.com"); + URL ipChecker = new URL("https://checkip.amazonaws.com"); BufferedReader reader = new BufferedReader(new InputStreamReader(ipChecker.openStream())); ip = reader.readLine(); } catch (Exception e) { @@ -358,7 +518,9 @@ public String getPublicIPAddress() throws UnknownHostException { } /** - * Authorizes IP to EC2 Security groups for RDS access. + * Adds the given IP address to the default security group for RDS access. + * + * @param ipAddress the IP address to add to the default security group */ public void ec2AuthorizeIP(String ipAddress) { if (StringUtils.isNullOrEmpty(ipAddress)) { @@ -370,14 +532,18 @@ public void ec2AuthorizeIP(String ipAddress) { } try { + IpRange ipRange = IpRange.builder() + .cidrIp(ipAddress + "/32") + .description("Test run at " + Instant.now()) + .build(); + IpPermission ipPermission = IpPermission.builder() + .ipRanges(ipRange) + .ipProtocol("-1") // All protocols + .fromPort(0) // For all ports + .toPort(65535) + .build(); ec2Client.authorizeSecurityGroupIngress( - (builder) -> - builder - .groupName(dbSecGroup) - .cidrIp(ipAddress + "/32") - .ipProtocol("-1") // All protocols - .fromPort(0) // For all ports - .toPort(65535)); + (builder) -> builder.groupName(DEFAULT_SECURITY_GROUP).ipPermissions(ipPermission)); } catch (Ec2Exception exception) { if (!DUPLICATE_IP_ERROR_CODE.equalsIgnoreCase(exception.awsErrorDetails().errorCode())) { throw exception; @@ -390,7 +556,7 @@ private boolean ipExists(String ipAddress) { ec2Client.describeSecurityGroups( (builder) -> builder - .groupNames(dbSecGroup) + .groupNames(DEFAULT_SECURITY_GROUP) .filters( software.amazon.awssdk.services.ec2.model.Filter.builder() .name("ip-permission.cidr") @@ -401,7 +567,9 @@ private boolean ipExists(String ipAddress) { } /** - * De-authorizes IP from EC2 Security groups. + * Removes the given IP address from the default security group. + * + * @param ipAddress the IP address to remove from the default security group. */ public void ec2DeauthorizesIP(String ipAddress) { if (StringUtils.isNullOrEmpty(ipAddress)) { @@ -411,7 +579,7 @@ public void ec2DeauthorizesIP(String ipAddress) { ec2Client.revokeSecurityGroupIngress( (builder) -> builder - .groupName(dbSecGroup) + .groupName(DEFAULT_SECURITY_GROUP) .cidrIp(ipAddress + "/32") .ipProtocol("-1") // All protocols .fromPort(0) // For all ports @@ -422,46 +590,49 @@ public void ec2DeauthorizesIP(String ipAddress) { } /** - * Destroys all instances and clusters. Removes IP from EC2 whitelist. + * Deletes the specified cluster and removes the current IP address from the default security group. * - * @param identifier database identifier to delete + * @param identifier the cluster identifier for the cluster to delete + * @param deployment the engine deployment for the cluster to delete + * @param waitForCompletion if true, wait for cluster completely deleted */ - public void deleteCluster(String identifier) { - dbIdentifier = identifier; - deleteCluster(); - } - - /** - * Destroys all instances and clusters. Removes IP from EC2 whitelist. - */ - public void deleteCluster() { - - switch (this.dbEngineDeployment) { + public void deleteCluster(String identifier, DatabaseEngineDeployment deployment, boolean waitForCompletion) { + switch (deployment) { case AURORA: - this.deleteAuroraCluster(); + this.deleteAuroraCluster(identifier, waitForCompletion); break; - case RDS_MULTI_AZ: - this.deleteMultiAzCluster(); + case RDS_MULTI_AZ_CLUSTER: + this.deleteMultiAzCluster(identifier, waitForCompletion); break; default: - throw new UnsupportedOperationException(this.dbEngineDeployment.toString()); + throw new UnsupportedOperationException(deployment.toString()); } } /** - * Destroys all instances and clusters. + * Deletes the specified Aurora cluster and removes the current IP address from the default security group. + * + * @param identifier the cluster identifier for the cluster to delete + * @param waitForCompletion if true, wait for cluster completely deleted */ - public void deleteAuroraCluster() { + public void deleteAuroraCluster(String identifier, boolean waitForCompletion) { + DBCluster dbCluster = getDBCluster(identifier); + if (dbCluster == null) { + return; + } + List members = dbCluster.dbClusterMembers(); + // Tear down instances - for (int i = 1; i <= numOfInstances; i++) { + for (DBClusterMember member : members) { try { rdsClient.deleteDBInstance( DeleteDbInstanceRequest.builder() - .dbInstanceIdentifier(dbIdentifier + "-" + i) + .dbInstanceIdentifier(member.dbInstanceIdentifier()) .skipFinalSnapshot(true) .build()); } catch (Exception ex) { - LOGGER.finest("Error deleting instance " + dbIdentifier + "-" + i + ". " + ex.getMessage()); + LOGGER.finest("Error deleting instance '" + + member.dbInstanceIdentifier() + "' of Aurora cluster: " + ex.getMessage()); // Ignore this error and continue with other instances } } @@ -471,7 +642,7 @@ public void deleteAuroraCluster() { while (--remainingAttempts > 0) { try { DeleteDbClusterResponse response = rdsClient.deleteDBCluster( - (builder -> builder.skipFinalSnapshot(true).dbClusterIdentifier(dbIdentifier))); + (builder -> builder.skipFinalSnapshot(true).dbClusterIdentifier(identifier))); if (response.sdkHttpResponse().isSuccessful()) { break; } @@ -479,23 +650,44 @@ public void deleteAuroraCluster() { } catch (DbClusterNotFoundException ex) { // ignore + return; + } catch (InvalidDbClusterStateException ex) { + throw new RuntimeException("Error deleting db cluster " + identifier, ex); } catch (Exception ex) { - LOGGER.warning("Error deleting db cluster " + dbIdentifier + ": " + ex); + LOGGER.warning("Error deleting db cluster " + identifier + ": " + ex); + return; + } + } + + if (waitForCompletion) { + final RdsWaiter waiter = rdsClient.waiter(); + WaiterResponse waiterResponse = + waiter.waitUntilDBClusterDeleted( + (requestBuilder) -> + requestBuilder.filters( + Filter.builder().name("db-cluster-id").values(identifier).build()), + (configurationBuilder) -> configurationBuilder.waitTimeout(Duration.ofMinutes(60))); + + if (waiterResponse.matched().exception().isPresent()) { + throw new RuntimeException( + "Unable to delete AWS Aurora Cluster after waiting for 60 minutes"); } } } /** - * Destroys all instances and clusters. + * Deletes the specified multi-az cluster and removes the current IP address from the default security group. + * + * @param identifier the cluster identifier for the cluster to delete + * @param waitForCompletion if true, wait for cluster completely deleted */ - public void deleteMultiAzCluster() { - // deleteDBinstance requests are not necessary to delete a multi-az cluster. + public void deleteMultiAzCluster(String identifier, boolean waitForCompletion) { // Tear down cluster int remainingAttempts = 5; while (--remainingAttempts > 0) { try { DeleteDbClusterResponse response = rdsClient.deleteDBCluster( - (builder -> builder.skipFinalSnapshot(true).dbClusterIdentifier(dbIdentifier))); + (builder -> builder.skipFinalSnapshot(true).dbClusterIdentifier(identifier))); if (response.sdkHttpResponse().isSuccessful()) { break; } @@ -503,12 +695,114 @@ public void deleteMultiAzCluster() { } catch (DbClusterNotFoundException ex) { // ignore + return; + } catch (Exception ex) { + LOGGER.warning("Error deleting db cluster " + identifier + ": " + ex); + return; + } + } + + if (waitForCompletion) { + final RdsWaiter waiter = rdsClient.waiter(); + WaiterResponse waiterResponse = + waiter.waitUntilDBClusterDeleted( + (requestBuilder) -> + requestBuilder.filters( + Filter.builder().name("db-cluster-id").values(identifier).build()), + (configurationBuilder) -> configurationBuilder.waitTimeout(Duration.ofMinutes(60))); + + if (waiterResponse.matched().exception().isPresent()) { + throw new RuntimeException( + "Unable to delete RDS MultiAz Cluster after waiting for 60 minutes"); + } + } + } + + public void deleteMultiAzInstance(final String identifier, boolean waitForCompletion) { + // Tear down MultiAz Instance + int remainingAttempts = 5; + while (--remainingAttempts > 0) { + try { + DeleteDbInstanceResponse response = rdsClient.deleteDBInstance( + builder -> builder.skipFinalSnapshot(true).dbInstanceIdentifier(identifier).build()); + if (response.sdkHttpResponse().isSuccessful()) { + break; + } + TimeUnit.SECONDS.sleep(30); + + } catch (InvalidDbInstanceStateException invalidDbInstanceStateException) { + // Instance is already being deleted. + // ignore it + LOGGER.finest("MultiAz Instance " + identifier + " is already being deleted. " + + invalidDbInstanceStateException); + break; + } catch (DbInstanceNotFoundException ex) { + // ignore + LOGGER.warning("Error deleting db MultiAz Instance " + identifier + ". Instance not found: " + ex); + break; } catch (Exception ex) { - LOGGER.warning("Error deleting db cluster " + dbIdentifier + ": " + ex); + LOGGER.warning("Error deleting db MultiAz Instance " + identifier + ": " + ex); + } + } + + if (waitForCompletion) { + final RdsWaiter waiter = rdsClient.waiter(); + WaiterResponse waiterResponse = + waiter.waitUntilDBInstanceDeleted( + (requestBuilder) -> + requestBuilder.filters( + Filter.builder().name("db-instance-id").values(identifier).build()), + (configurationBuilder) -> configurationBuilder.waitTimeout(Duration.ofMinutes(60))); + + if (waiterResponse.matched().exception().isPresent()) { + throw new RuntimeException( + "Unable to delete RDS MultiAz Instance after waiting for 60 minutes"); } } } + public void promoteClusterToStandalone(String clusterArn) { + if (StringUtils.isNullOrEmpty(clusterArn)) { + return; + } + + DBCluster clusterInfo = getClusterByArn(clusterArn); + + if (clusterInfo == null || StringUtils.isNullOrEmpty(clusterInfo.replicationSourceIdentifier())) { + return; + } + + PromoteReadReplicaDbClusterResponse response = rdsClient.promoteReadReplicaDBCluster( + PromoteReadReplicaDbClusterRequest.builder().dbClusterIdentifier(clusterInfo.dbClusterIdentifier()).build()); + if (!response.sdkHttpResponse().isSuccessful()) { + LOGGER.warning("Error promoting DB cluster to standalone cluster: " + + response.sdkHttpResponse().statusCode() + + " " + + response.sdkHttpResponse().statusText().orElse("")); + } + } + + public void promoteInstanceToStandalone(String instanceArn) { + if (StringUtils.isNullOrEmpty(instanceArn)) { + return; + } + + DBInstance instanceInfo = getRdsInstanceInfoByArn(instanceArn); + + if (instanceInfo == null || StringUtils.isNullOrEmpty(instanceInfo.readReplicaSourceDBInstanceIdentifier())) { + return; + } + + PromoteReadReplicaResponse response = rdsClient.promoteReadReplica( + PromoteReadReplicaRequest.builder().dbInstanceIdentifier(instanceInfo.dbInstanceIdentifier()).build()); + if (!response.sdkHttpResponse().isSuccessful()) { + LOGGER.warning("Error promoting DB instance to standalone instance: " + + response.sdkHttpResponse().statusCode() + + " " + + response.sdkHttpResponse().statusText().orElse("")); + } + } + public boolean doesClusterExist(final String clusterId) { final DescribeDbClustersRequest request = DescribeDbClustersRequest.builder().dbClusterIdentifier(clusterId).build(); @@ -520,6 +814,17 @@ public boolean doesClusterExist(final String clusterId) { return true; } + public boolean doesInstanceExist(final String instanceId) { + final DescribeDbInstancesRequest request = + DescribeDbInstancesRequest.builder().dbInstanceIdentifier(instanceId).build(); + try { + DescribeDbInstancesResponse response = rdsClient.describeDBInstances(request); + return response.sdkHttpResponse().isSuccessful(); + } catch (DbInstanceNotFoundException ex) { + return false; + } + } + public DBCluster getClusterInfo(final String clusterId) { final DescribeDbClustersRequest request = DescribeDbClustersRequest.builder().dbClusterIdentifier(clusterId).build(); @@ -531,6 +836,43 @@ public DBCluster getClusterInfo(final String clusterId) { return response.dbClusters().get(0); } + public DBCluster getClusterByArn(final String clusterArn) { + final DescribeDbClustersRequest request = + DescribeDbClustersRequest.builder() + .filters(Filter.builder().name("db-cluster-id").values(clusterArn).build()) + .build(); + final DescribeDbClustersResponse response = rdsClient.describeDBClusters(request); + if (!response.hasDbClusters()) { + return null; + } + + return response.dbClusters().get(0); + } + + public DBInstance getRdsInstanceInfo(final String instanceId) { + final DescribeDbInstancesRequest request = + DescribeDbInstancesRequest.builder().dbInstanceIdentifier(instanceId).build(); + final DescribeDbInstancesResponse response = rdsClient.describeDBInstances(request); + if (!response.hasDbInstances()) { + throw new RuntimeException("RDS Instance " + instanceId + " not found."); + } + + return response.dbInstances().get(0); + } + + public DBInstance getRdsInstanceInfoByArn(final String instanceArn) { + final DescribeDbInstancesRequest request = + DescribeDbInstancesRequest.builder().filters( + Filter.builder().name("db-instance-id").values(instanceArn).build()) + .build(); + final DescribeDbInstancesResponse response = rdsClient.describeDBInstances(request); + if (!response.hasDbInstances()) { + return null; + } + + return response.dbInstances().get(0); + } + public DatabaseEngine getClusterEngine(final DBCluster cluster) { switch (cluster.engine()) { case "aurora-postgresql": @@ -544,91 +886,153 @@ public DatabaseEngine getClusterEngine(final DBCluster cluster) { } } - public List getClusterInstanceIds(final String clusterId) { - final DescribeDbInstancesResponse dbInstancesResult = - rdsClient.describeDBInstances( - (builder) -> - builder.filters(Filter.builder().name("db-cluster-id").values(clusterId).build())); + public String getDbInstanceClass(TestEnvironmentRequest request) { + switch (request.getDatabaseEngineDeployment()) { + case AURORA: + return request.getFeatures().contains(TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT) + ? "db.r7g.2xlarge" + : "db.r5.large"; + case RDS: + case RDS_MULTI_AZ_INSTANCE: + case RDS_MULTI_AZ_CLUSTER: + return "db.m5d.large"; + default: + throw new NotImplementedException(request.getDatabaseEngineDeployment().toString()); + } + } - List result = new ArrayList<>(); - for (DBInstance instance : dbInstancesResult.dbInstances()) { - result.add( + public DatabaseEngine getRdsInstanceEngine(final DBInstance instance) { + switch (instance.engine()) { + case "postgres": + return DatabaseEngine.PG; + case "mysql": + return DatabaseEngine.MYSQL; + default: + throw new UnsupportedOperationException(instance.engine()); + } + } + + public String getAuroraParameterGroupFamily(String engine, String engineVersion) { + switch (engine) { + case "aurora-postgresql": + return "aurora-postgresql16"; + case "aurora-mysql": + if (StringUtils.isNullOrEmpty(engineVersion) || engineVersion.contains("8.0")) { + return "aurora-mysql8.0"; + } + return "aurora-mysql5.7"; + default: + throw new UnsupportedOperationException(engine); + } + } + + public List getTestInstancesInfo(final String clusterId) { + List dbInstances = getDBInstances(clusterId); + List instancesInfo = new ArrayList<>(); + for (DBInstance dbInstance : dbInstances) { + instancesInfo.add( new TestInstanceInfo( - instance.dbInstanceIdentifier(), - instance.endpoint().address(), - instance.endpoint().port())); + dbInstance.dbInstanceIdentifier(), + dbInstance.endpoint().address(), + dbInstance.endpoint().port())); } - return result; + + return instancesInfo; } public void waitUntilClusterHasRightState(String clusterId) throws InterruptedException { + waitUntilClusterHasRightState(clusterId, "available"); + } + + public void waitUntilClusterHasRightState(String clusterId, String... allowedStatuses) throws InterruptedException { String status = getDBCluster(clusterId).status(); - while (!"available".equalsIgnoreCase(status)) { + LOGGER.finest("Cluster status: " + status + ", waiting for status: " + String.join(", ", allowedStatuses)); + final Set allowedStatusSet = Arrays.stream(allowedStatuses) + .map(String::toLowerCase) + .collect(Collectors.toSet()); + final long waitTillNanoTime = System.nanoTime() + TimeUnit.MINUTES.toNanos(15); + while (!allowedStatusSet.contains(status.toLowerCase()) && waitTillNanoTime > System.nanoTime()) { TimeUnit.MILLISECONDS.sleep(1000); - status = getDBCluster(clusterId).status(); + String tmpStatus = getDBCluster(clusterId).status(); + if (!tmpStatus.equalsIgnoreCase(status)) { + LOGGER.finest("Cluster status (waiting): " + tmpStatus); + } + status = tmpStatus; } + LOGGER.finest("Cluster status (after wait): " + status); } public DBCluster getDBCluster(String clusterId) { - final DescribeDbClustersResponse dbClustersResult = - rdsClient.describeDBClusters((builder) -> builder.dbClusterIdentifier(clusterId)); + DescribeDbClustersResponse dbClustersResult = null; + int remainingTries = 5; + while (remainingTries-- > 0) { + try { + dbClustersResult = rdsClient.describeDBClusters((builder) -> builder.dbClusterIdentifier(clusterId)); + break; + } catch (DbClusterNotFoundException ex) { + return null; + } catch (SdkClientException sdkClientException) { + if (remainingTries == 0) { + throw sdkClientException; + } + } + } + + if (dbClustersResult == null) { + fail("Unable to get DB cluster info for cluster with ID " + clusterId); + } + final List dbClusterList = dbClustersResult.dbClusters(); return dbClusterList.get(0); } - public List getAuroraInstanceIds( - DatabaseEngine databaseEngine, String connectionUrl, String userName, String password) - throws SQLException { - - String retrieveTopologySql; - switch (databaseEngine) { - case MYSQL: - retrieveTopologySql = - "SELECT SERVER_ID, SESSION_ID FROM information_schema.replica_host_status " - + "ORDER BY IF(SESSION_ID = 'MASTER_SESSION_ID', 0, 1)"; - break; - case PG: - retrieveTopologySql = - "SELECT SERVER_ID, SESSION_ID FROM aurora_replica_status() " - + "ORDER BY CASE WHEN SESSION_ID = 'MASTER_SESSION_ID' THEN 0 ELSE 1 END"; + public DBInstance getDBInstance(String instanceId) { + DescribeDbInstancesResponse dbInstanceResult = null; + int remainingTries = 5; + while (remainingTries-- > 0) { + try { + dbInstanceResult = rdsClient.describeDBInstances((builder) -> builder.dbInstanceIdentifier(instanceId)); break; - default: - throw new UnsupportedOperationException(databaseEngine.toString()); - } - - ArrayList auroraInstances = new ArrayList<>(); + } catch (SdkClientException sdkClientException) { + if (remainingTries == 0) { + throw sdkClientException; + } - try (final Connection conn = DriverManager.getConnection(connectionUrl, userName, password); - final Statement stmt = conn.createStatement(); - final ResultSet resultSet = stmt.executeQuery(retrieveTopologySql)) { - while (resultSet.next()) { - // Get Instance endpoints - final String hostEndpoint = resultSet.getString("SERVER_ID"); - auroraInstances.add(hostEndpoint); + try { + TimeUnit.SECONDS.sleep(30); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new RuntimeException(ex); + } } } - return auroraInstances; - } - - public Boolean isDBInstanceWriter(String clusterId, String instanceId) { - return getMatchedDBClusterMember(clusterId, instanceId).isClusterWriter(); - } - public DBClusterMember getMatchedDBClusterMember(String clusterId, String instanceId) { - final List matchedMemberList = - getDBClusterMemberList(clusterId).stream() - .filter(dbClusterMember -> dbClusterMember.dbInstanceIdentifier().equals(instanceId)) - .collect(Collectors.toList()); - if (matchedMemberList.isEmpty()) { - throw new RuntimeException( - "Cannot find cluster member whose db instance identifier is " + instanceId); + if (dbInstanceResult == null) { + fail("Unable to get DB instance info for instance with ID " + instanceId); } - return matchedMemberList.get(0); + + final List dbClusterList = dbInstanceResult.dbInstances(); + return dbClusterList.get(0); } - public List getDBClusterMemberList(String clusterId) { - final DBCluster dbCluster = getDBCluster(clusterId); - return dbCluster.dbClusterMembers(); + public void waitUntilInstanceHasRightState(String instanceId, String... allowedStatuses) throws InterruptedException { + + String status = getDBInstance(instanceId).dbInstanceStatus(); + LOGGER.finest("Instance " + instanceId + " status: " + status + + ", waiting for status: " + String.join(", ", allowedStatuses)); + final Set allowedStatusSet = Arrays.stream(allowedStatuses) + .map(String::toLowerCase) + .collect(Collectors.toSet()); + final long waitTillNanoTime = System.nanoTime() + TimeUnit.MINUTES.toNanos(15); + while (!allowedStatusSet.contains(status.toLowerCase()) && waitTillNanoTime > System.nanoTime()) { + TimeUnit.MILLISECONDS.sleep(1000); + String tmpStatus = getDBInstance(instanceId).dbInstanceStatus(); + if (!tmpStatus.equalsIgnoreCase(status)) { + LOGGER.finest("Instance " + instanceId + " status (waiting): " + tmpStatus); + } + status = tmpStatus; + } + LOGGER.finest("Instance " + instanceId + " status (after wait): " + status); } public void addAuroraAwsIamUser( @@ -637,7 +1041,8 @@ public void addAuroraAwsIamUser( String userName, String password, String dbUser, - String databaseName) + String databaseName, + boolean useRdsTools) throws SQLException { try (final Connection conn = DriverManager.getConnection(connectionUrl, userName, password); @@ -648,13 +1053,28 @@ public void addAuroraAwsIamUser( stmt.execute("DROP USER IF EXISTS " + dbUser + ";"); stmt.execute( "CREATE USER " + dbUser + " IDENTIFIED WITH AWSAuthenticationPlugin AS 'RDS';"); - stmt.execute("GRANT ALL PRIVILEGES ON " + databaseName + ".* TO '" + dbUser + "'@'%';"); + if (!StringUtils.isNullOrEmpty(databaseName)) { + stmt.execute("GRANT ALL PRIVILEGES ON " + databaseName + ".* TO '" + dbUser + "'@'%';"); + } else { + stmt.execute("GRANT ALL PRIVILEGES ON `%`.* TO '" + dbUser + "'@'%';"); + } + + // BG switchover status needs it. + stmt.execute("GRANT SELECT ON mysql.* TO '" + dbUser + "'@'%';"); break; case PG: stmt.execute("DROP USER IF EXISTS " + dbUser + ";"); stmt.execute("CREATE USER " + dbUser + ";"); stmt.execute("GRANT rds_iam TO " + dbUser + ";"); - stmt.execute("GRANT ALL PRIVILEGES ON DATABASE " + databaseName + " TO " + dbUser + ";"); + if (!StringUtils.isNullOrEmpty(databaseName)) { + stmt.execute("GRANT ALL PRIVILEGES ON DATABASE " + databaseName + " TO " + dbUser + ";"); + } + + if (useRdsTools) { + // BG switchover status needs it. + stmt.execute("GRANT USAGE ON SCHEMA rds_tools TO " + dbUser + ";"); + stmt.execute("GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA rds_tools TO " + dbUser + ";"); + } break; default: throw new UnsupportedOperationException(databaseEngine.toString()); @@ -665,7 +1085,7 @@ public void addAuroraAwsIamUser( public List getEngineVersions(String engine) { final List res = new ArrayList<>(); final DescribeDbEngineVersionsResponse versions = rdsClient.describeDBEngineVersions( - DescribeDbEngineVersionsRequest.builder().engine(engine).build() + DescribeDbEngineVersionsRequest.builder().engine(engine).build() ); for (DBEngineVersion version : versions.dbEngineVersions()) { if (version.engineVersion().contains("limitless")) { @@ -678,18 +1098,197 @@ public List getEngineVersions(String engine) { } public String getLatestVersion(String engine) { - return getEngineVersions(engine) - .stream().min(Comparator.reverseOrder()) - .orElse(null); + return getEngineVersions(engine).stream() + .filter(version -> !version.contains("limitless")) + .max(Comparator.naturalOrder()) + .orElse(null); } - public String getLTSVersion(String engine) { + public String getDefaultVersion(String engine) { final DescribeDbEngineVersionsResponse versions = rdsClient.describeDBEngineVersions( - DescribeDbEngineVersionsRequest.builder().defaultOnly(true).engine(engine).build() + DescribeDbEngineVersionsRequest.builder().defaultOnly(true).engine(engine).build() ); if (!versions.dbEngineVersions().isEmpty()) { return versions.dbEngineVersions().get(0).engineVersion(); } - throw new RuntimeException("Failed to find LTS version"); + throw new RuntimeException("Failed to find default version"); + } + + public String createBlueGreenDeployment(String name, String sourceArn) { + + final String blueGreenName = "bgd-" + name; + + CreateBlueGreenDeploymentResponse response = null; + int count = 10; + while (response == null && count-- > 0) { + try { + response = rdsClient.createBlueGreenDeployment( + CreateBlueGreenDeploymentRequest.builder() + .blueGreenDeploymentName(blueGreenName) + .source(sourceArn) + .tags(this.getTag()) + .build()); + } catch (RdsException ex) { + if (ex.statusCode() != 500 || count == 0) { + throw ex; + } + + LOGGER.finest("Can't send createBlueGreenDeployment request. Wait 1min and try again."); + + try { + TimeUnit.MINUTES.sleep(1); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } + } + + if (response == null) { + throw new RuntimeException("Can't send createBlueGreenDeployment request."); + } + + if (!response.sdkHttpResponse().isSuccessful()) { + LOGGER.finest(String.format("createBlueGreenDeployment response: %d, %s", + response.sdkHttpResponse().statusCode(), + response.sdkHttpResponse().statusText())); + throw new RuntimeException(response.sdkHttpResponse().statusText().orElse("Unspecified error.")); + } else { + LOGGER.finest("createBlueGreenDeployment request is sent"); + } + + String blueGreenId = response.blueGreenDeployment().blueGreenDeploymentIdentifier(); + + BlueGreenDeployment blueGreenDeployment = getBlueGreenDeployment(blueGreenId); + long end = System.nanoTime() + TimeUnit.MINUTES.toNanos(240); + while ((blueGreenDeployment == null || !blueGreenDeployment.status().equalsIgnoreCase("available")) + && System.nanoTime() < end) { + try { + TimeUnit.SECONDS.sleep(60); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + blueGreenDeployment = getBlueGreenDeployment(blueGreenId); + } + + if (blueGreenDeployment == null || !blueGreenDeployment.status().equalsIgnoreCase("available")) { + throw new RuntimeException("BlueGreen Deployment " + blueGreenId + " isn't available."); + } + + return blueGreenId; + } + + public void waitUntilBlueGreenDeploymentHasRightState(String blueGreenId, String... allowedStatuses) { + + String status = getBlueGreenDeployment(blueGreenId).status(); + LOGGER.finest("BGD status: " + status + ", waiting for status: " + String.join(", ", allowedStatuses)); + final Set allowedStatusSet = Arrays.stream(allowedStatuses) + .map(String::toLowerCase) + .collect(Collectors.toSet()); + final long waitTillNanoTime = System.nanoTime() + TimeUnit.MINUTES.toNanos(15); + while (!allowedStatusSet.contains(status.toLowerCase()) && waitTillNanoTime > System.nanoTime()) { + try { + TimeUnit.MILLISECONDS.sleep(1000); + } catch (InterruptedException ex) { + throw new RuntimeException(ex); + } + String tmpStatus = getBlueGreenDeployment(blueGreenId).status(); + if (!tmpStatus.equalsIgnoreCase(status)) { + LOGGER.finest("BGD status (waiting): " + tmpStatus); + } + status = tmpStatus; + } + LOGGER.finest("BGD status (after wait): " + status); + + if (!allowedStatusSet.contains(status.toLowerCase())) { + throw new RuntimeException("BlueGreen Deployment " + blueGreenId + " has wrong status."); + } + } + + public boolean doesBlueGreenDeploymentExist(String blueGreenId) { + try { + DescribeBlueGreenDeploymentsResponse response = rdsClient.describeBlueGreenDeployments( + builder -> builder.blueGreenDeploymentIdentifier(blueGreenId)); + return response.blueGreenDeployments() != null && !response.blueGreenDeployments().isEmpty(); + } catch (BlueGreenDeploymentNotFoundException ex) { + LOGGER.finest("blueGreenDeployments not found"); + return false; + } + } + + public BlueGreenDeployment getBlueGreenDeployment(String blueGreenId) { + try { + DescribeBlueGreenDeploymentsResponse response = rdsClient.describeBlueGreenDeployments( + builder -> builder.blueGreenDeploymentIdentifier(blueGreenId)); + if (response.hasBlueGreenDeployments()) { + return response.blueGreenDeployments().get(0); + } + return null; + } catch (BlueGreenDeploymentNotFoundException ex) { + return null; + } + } + + public BlueGreenDeployment getBlueGreenDeploymentBySource(String sourceArn) { + try { + DescribeBlueGreenDeploymentsResponse response = rdsClient.describeBlueGreenDeployments( + builder -> builder.filters(f -> f.name("source").values(sourceArn))); + if (!response.blueGreenDeployments().isEmpty()) { + return response.blueGreenDeployments().get(0); + } + return null; + } catch (BlueGreenDeploymentNotFoundException ex) { + return null; + } + } + + public void deleteBlueGreenDeployment(String blueGreenId, boolean waitForCompletion) { + + if (!doesBlueGreenDeploymentExist(blueGreenId)) { + return; + } + + waitUntilBlueGreenDeploymentHasRightState(blueGreenId, "available", "switchover_completed"); + + DeleteBlueGreenDeploymentResponse response = rdsClient.deleteBlueGreenDeployment( + DeleteBlueGreenDeploymentRequest.builder() + .blueGreenDeploymentIdentifier(blueGreenId) + .build()); + + if (!response.sdkHttpResponse().isSuccessful()) { + LOGGER.finest(String.format("deleteBlueGreenDeployment response: %d, %s", + response.sdkHttpResponse().statusCode(), + response.sdkHttpResponse().statusText())); + throw new RuntimeException(response.sdkHttpResponse().statusText().orElse("Unspecified error.")); + } else { + LOGGER.finest("deleteBlueGreenDeployment request is sent"); + } + + if (waitForCompletion) { + long endTimeNano = System.nanoTime() + TimeUnit.MINUTES.toNanos(120); + while (doesBlueGreenDeploymentExist(blueGreenId) && endTimeNano > System.nanoTime()) { + try { + TimeUnit.MINUTES.sleep(1); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + return; + } + } + + if (doesBlueGreenDeploymentExist(blueGreenId)) { + throw new RuntimeException( + "Unable to delete Blue/Green Deployment after waiting for 120 minutes"); + } + } + } + + private Tag getTag() { + ZoneId zoneId = ZoneId.of("America/Los_Angeles"); + ZonedDateTime zdt = Instant.now().atZone(zoneId); + String timeStr = zdt.format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss zzz")); + return Tag.builder() + .key("env").value("test-runner") + .key("created").value(timeStr) + .build(); } } diff --git a/tests/integration/host/src/test/java/integration/util/ContainerHelper.java b/tests/integration/host/src/test/java/integration/util/ContainerHelper.java index 48105045..a4d7500e 100644 --- a/tests/integration/host/src/test/java/integration/util/ContainerHelper.java +++ b/tests/integration/host/src/test/java/integration/util/ContainerHelper.java @@ -23,9 +23,10 @@ import com.github.dockerjava.api.command.ExecCreateCmdResponse; import com.github.dockerjava.api.command.InspectContainerResponse; import com.github.dockerjava.api.exception.DockerException; -import integration.DebugEnv; import eu.rekawek.toxiproxy.ToxiproxyClient; +import integration.DebugEnv; import integration.TestInstanceInfo; +import integration.host.TestEnvironmentConfiguration; import java.io.IOException; import java.util.function.Consumer; import java.util.function.Function; @@ -44,14 +45,14 @@ import org.testcontainers.images.builder.dockerfile.DockerfileBuilder; import org.testcontainers.utility.DockerImageName; import org.testcontainers.utility.MountableFile; -import integration.host.TestEnvironmentConfiguration; +import org.testcontainers.utility.TestEnvironment; public class ContainerHelper { private static final String MYSQL_CONTAINER_IMAGE_NAME = "mysql:8.0.36"; private static final String POSTGRES_CONTAINER_IMAGE_NAME = "postgres:latest"; private static final DockerImageName TOXIPROXY_IMAGE = - DockerImageName.parse("shopify/toxiproxy:2.1.4"); + DockerImageName.parse("ghcr.io/shopify/toxiproxy:2.11.0"); private static final int PROXY_CONTROL_PORT = 8474; private static final int PROXY_PORT = 8666; @@ -59,32 +60,6 @@ public class ContainerHelper { private static final String XRAY_TELEMETRY_IMAGE_NAME = "amazon/aws-xray-daemon"; private static final String OTLP_TELEMETRY_IMAGE_NAME = "amazon/aws-otel-collector"; - private static final String RETRIEVE_TOPOLOGY_SQL_POSTGRES = - "SELECT SERVER_ID, SESSION_ID FROM aurora_replica_status() " - + "ORDER BY CASE WHEN SESSION_ID = 'MASTER_SESSION_ID' THEN 0 ELSE 1 END"; - private static final String RETRIEVE_TOPOLOGY_SQL_MYSQL = - "SELECT SERVER_ID, SESSION_ID FROM information_schema.replica_host_status " - + "ORDER BY IF(SESSION_ID = 'MASTER_SESSION_ID', 0, 1)"; - private static final String SERVER_ID = "SERVER_ID"; - - public Long runCmd(GenericContainer container, String... cmd) - throws IOException, InterruptedException { - System.out.println("==== Container console feed ==== >>>>"); - Consumer consumer = new ConsoleConsumer(); - Long exitCode = execInContainer(container, consumer, cmd); - System.out.println("==== Container console feed ==== <<<<"); - return exitCode; - } - - public Long runCmdInDirectory(GenericContainer container, String workingDirectory, String... cmd) - throws IOException, InterruptedException { - System.out.println("==== Container console feed ==== >>>>"); - Consumer consumer = new ConsoleConsumer(); - Long exitCode = execInContainer(container, workingDirectory, consumer, cmd); - System.out.println("==== Container console feed ==== <<<<"); - return exitCode; - } - public void runTest(GenericContainer container, String testFolder, String primaryInfo, TestEnvironmentConfiguration config) throws IOException, InterruptedException { System.out.println("==== Container console feed ==== >>>>"); @@ -263,12 +238,6 @@ public T withFixedExposedPort(int hostPort, int containerPort) { .withPrivilegedMode(true); // Required to control Linux core settings like TcpKeepAlive } - protected Long execInContainer( - GenericContainer container, String workingDirectory, Consumer consumer, String... command) - throws UnsupportedOperationException, IOException, InterruptedException { - return execInContainer(container.getContainerInfo(), consumer, workingDirectory, command); - } - protected Long execInContainer( GenericContainer container, Consumer consumer, @@ -407,11 +376,6 @@ public FixedExposedPortContainer(final DockerImageName dockerImageName) { super(dockerImageName); } - public T withFixedExposedPort(int hostPort, int containerPort, InternetProtocol protocol) { - super.addFixedExposedPort(hostPort, containerPort, protocol); - return self(); - } - public T withExposedPort(Integer port) { super.addExposedPort(port); return self(); From 64637c9d87a04ed51345c49ed1de1e0de5e90e31 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Mon, 16 Jun 2025 15:13:44 -0700 Subject: [PATCH 12/41] unit test failures wip --- tests/unit/test_reader_failover_handler.py | 12 +++++------- tests/unit/test_writer_failover_handler.py | 18 +++++++----------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/tests/unit/test_reader_failover_handler.py b/tests/unit/test_reader_failover_handler.py index 180a8770..0399178c 100644 --- a/tests/unit/test_reader_failover_handler.py +++ b/tests/unit/test_reader_failover_handler.py @@ -77,7 +77,7 @@ def test_failover(plugin_service_mock, connection_mock, default_properties, defa expected.append(call(current_host.all_aliases, HostAvailability.UNAVAILABLE)) exception = Exception("Test Exception") - def force_connect_side_effect(host_info, properties, timeout_event) -> Connection: + def force_connect_side_effect(host_info, properties) -> Connection: if host_info == success_host: return connection_mock else: @@ -105,11 +105,9 @@ def test_failover_timeout(plugin_service_mock, connection_mock, default_properti props = default_properties current_host = hosts[2] - def force_connect_side_effect(host_info, properties, timeout_event) -> Connection: + def force_connect_side_effect(host_info, properties) -> Connection: # Sleep for 1 second 20 times unless interrupted by timeout for _ in range(0, 20): - if timeout_event.is_set(): - break sleep(1) return connection_mock @@ -158,7 +156,7 @@ def test_get_reader_connection_success(plugin_service_mock, connection_mock, def slow_host = hosts[1] fast_host = hosts[2] - def force_connect_side_effect(host_info, properties, timeout_event) -> Connection: + def force_connect_side_effect(host_info, properties) -> Connection: # we want slow host to take 20 seconds before returning connection if host_info == slow_host: sleep(20) @@ -182,7 +180,7 @@ def test_get_reader_connection_failure(plugin_service_mock, connection_mock, def props = default_properties exception = Exception("Test Exception") - def force_connect_side_effect(host_info, properties, timeout_event) -> Connection: + def force_connect_side_effect(host_info, properties) -> Connection: raise exception plugin_service_mock.force_connect.side_effect = force_connect_side_effect @@ -201,7 +199,7 @@ def test_get_reader_connection_attempts_timeout(plugin_service_mock, connection_ hosts = default_hosts[0:3] props = default_properties - def force_connect_side_effect(host_info, properties, timeout_event) -> Connection: + def force_connect_side_effect(host_info, properties) -> Connection: try: sleep(5) except Exception: diff --git a/tests/unit/test_writer_failover_handler.py b/tests/unit/test_writer_failover_handler.py index 71c8c150..09cff3b0 100644 --- a/tests/unit/test_writer_failover_handler.py +++ b/tests/unit/test_writer_failover_handler.py @@ -117,7 +117,7 @@ def test_reconnect_to_writer_task_b_reader_exception( writer_connection_mock, plugin_service_mock, reader_failover_mock, default_properties, writer, topology): exception = Exception("Test Exception") - def force_connect_side_effect(host_info, _, __) -> Connection: + def force_connect_side_effect(host_info, _) -> Connection: if host_info == writer: return writer_connection_mock else: @@ -158,7 +158,7 @@ def test_reconnect_to_writer_slow_task_b( mock_hosts_property = mocker.PropertyMock(side_effect=chain([topology], cycle([new_topology]))) type(plugin_service_mock).all_hosts = mock_hosts_property - def force_connect_side_effect(host_info, _, __) -> Connection: + def force_connect_side_effect(host_info, _) -> Connection: if host_info == writer: return writer_connection_mock elif host_info == new_writer_host: @@ -196,7 +196,7 @@ def test_reconnect_to_writer_task_b_defers( writer, reader_a, reader_b, topology): exception = Exception("Test Exception") - def force_connect_side_effect(host_info, _, __) -> Connection: + def force_connect_side_effect(host_info, _) -> Connection: if host_info == writer: sleep(5) return writer_connection_mock @@ -236,7 +236,7 @@ def test_connect_to_new_writer_slow_task_a( reader_b, topology, new_topology): exception = Exception("Test Exception") - def force_connect_side_effect(host_info, _, __) -> Connection: + def force_connect_side_effect(host_info, _) -> Connection: if host_info == writer: sleep(5) return writer_connection_mock @@ -284,7 +284,7 @@ def test_connect_to_new_writer_task_a_defers( exception = Exception("Test Exception") - def force_connect_side_effect(host_info, _, __) -> Connection: + def force_connect_side_effect(host_info, _) -> Connection: if host_info == writer: return writer_connection_mock elif host_info == reader_a: @@ -333,11 +333,9 @@ def test_failed_to_connect_failover_timeout( reader_b, topology, new_topology): exception = Exception("Test Exception") - def force_connect_side_effect(host_info, _, timeout_event) -> Connection: + def force_connect_side_effect(host_info, _) -> Connection: if host_info == writer: for _ in range(0, 30): - if timeout_event.is_set(): - break sleep(1) return writer_connection_mock elif host_info == reader_a: @@ -346,8 +344,6 @@ def force_connect_side_effect(host_info, _, timeout_event) -> Connection: return reader_b_connection_mock elif host_info == new_writer_host: for _ in range(0, 30): - if timeout_event.is_set(): - break sleep(1) return new_writer_connection_mock else: @@ -390,7 +386,7 @@ def test_failed_to_connect_task_a_exception_task_b_writer_exception( default_properties, new_writer_host, writer, reader_a, reader_b, topology, new_topology): exception = Exception("Test Exception") - def force_connect_side_effect(host_info, _, __) -> Connection: + def force_connect_side_effect(host_info, _) -> Connection: if host_info == reader_a: return reader_a_connection_mock elif host_info == reader_b: From f479a09052dc30b875ac14b93a11d85fc9eaaaf4 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Mon, 16 Jun 2025 17:32:16 -0700 Subject: [PATCH 13/41] Fix unit tests --- .../reader_failover_handler.py | 10 +++++-- .../writer_failover_handler.py | 7 +++-- tests/unit/test_reader_failover_handler.py | 29 +++++++------------ tests/unit/test_writer_failover_handler.py | 6 ++-- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/aws_advanced_python_wrapper/reader_failover_handler.py b/aws_advanced_python_wrapper/reader_failover_handler.py index d0bc2978..e20aba45 100644 --- a/aws_advanced_python_wrapper/reader_failover_handler.py +++ b/aws_advanced_python_wrapper/reader_failover_handler.py @@ -101,7 +101,8 @@ def failover(self, current_topology: Tuple[HostInfo, ...], current_host: Optiona return ReaderFailoverHandlerImpl.failed_reader_failover_result result: ReaderFailoverResult = ReaderFailoverHandlerImpl.failed_reader_failover_result - with ThreadPoolExecutor(thread_name_prefix="ReaderFailoverHandlerExecutor") as executor: + executor = ThreadPoolExecutor(thread_name_prefix="ReaderFailoverHandlerExecutor") + try: future = executor.submit(self._internal_failover_task, current_topology, current_host) try: @@ -110,6 +111,8 @@ def failover(self, current_topology: Tuple[HostInfo, ...], current_host: Optiona result = ReaderFailoverHandlerImpl.failed_reader_failover_result except TimeoutError: self._timeout_event.set() + finally: + executor.shutdown(wait=False) return result @@ -171,7 +174,8 @@ def _get_connection_from_host_group(self, hosts: Tuple[HostInfo, ...]) -> Reader return ReaderFailoverHandlerImpl.failed_reader_failover_result def _get_result_from_next_task_batch(self, hosts: Tuple[HostInfo, ...], i: int) -> ReaderFailoverResult: - with ThreadPoolExecutor(thread_name_prefix="ReaderFailoverHandlerRetrieveResultsExecutor") as executor: + executor = ThreadPoolExecutor(thread_name_prefix="ReaderFailoverHandlerExecutor") + try: futures = [executor.submit(self.attempt_connection, hosts[i])] if i + 1 < len(hosts): futures.append(executor.submit(self.attempt_connection, hosts[i + 1])) @@ -186,6 +190,8 @@ def _get_result_from_next_task_batch(self, hosts: Tuple[HostInfo, ...], i: int) self._timeout_event.set() finally: self._timeout_event.set() + finally: + executor.shutdown(wait=False) return ReaderFailoverHandlerImpl.failed_reader_failover_result diff --git a/aws_advanced_python_wrapper/writer_failover_handler.py b/aws_advanced_python_wrapper/writer_failover_handler.py index 51139d81..25b70194 100644 --- a/aws_advanced_python_wrapper/writer_failover_handler.py +++ b/aws_advanced_python_wrapper/writer_failover_handler.py @@ -118,23 +118,24 @@ def get_result_from_future(self, current_topology: Tuple[HostInfo, ...]) -> Writ if writer_host is not None: self._plugin_service.set_availability(writer_host.as_aliases(), HostAvailability.UNAVAILABLE) - with ThreadPoolExecutor(thread_name_prefix="WriterFailoverHandlerExecutor") as executor: + executor = ThreadPoolExecutor(thread_name_prefix="WriterFailoverHandlerExecutor") + try: try: futures = [executor.submit(self.reconnect_to_writer, writer_host), executor.submit(self.wait_for_new_writer, current_topology, writer_host)] for future in as_completed(futures, timeout=self._max_failover_timeout_sec): result = future.result() if result.is_connected: - executor.shutdown(wait=False) self.log_task_success(result) return result if result.exception is not None: - executor.shutdown(wait=False) return result except TimeoutError: self._timeout_event.set() finally: self._timeout_event.set() + finally: + executor.shutdown(wait=False) return WriterFailoverHandlerImpl.failed_writer_failover_result diff --git a/tests/unit/test_reader_failover_handler.py b/tests/unit/test_reader_failover_handler.py index 0399178c..584d1392 100644 --- a/tests/unit/test_reader_failover_handler.py +++ b/tests/unit/test_reader_failover_handler.py @@ -50,12 +50,12 @@ def default_properties(): @pytest.fixture def default_hosts(): - return [HostInfo("writer", 1234, HostRole.WRITER, HostAvailability.AVAILABLE), + return (HostInfo("writer", 1234, HostRole.WRITER, HostAvailability.AVAILABLE), HostInfo("reader1", 1234, HostRole.READER, HostAvailability.AVAILABLE), HostInfo("reader2", 1234, HostRole.READER, HostAvailability.AVAILABLE), HostInfo("reader3", 1234, HostRole.READER, HostAvailability.AVAILABLE), HostInfo("reader4", 1234, HostRole.READER, HostAvailability.AVAILABLE), - HostInfo("reader5", 1234, HostRole.READER, HostAvailability.AVAILABLE)] + HostInfo("reader5", 1234, HostRole.READER, HostAvailability.AVAILABLE)) @pytest.fixture @@ -64,7 +64,7 @@ def set_available_count_for_host(): def test_failover(plugin_service_mock, connection_mock, default_properties, default_hosts): - hosts = default_hosts.copy() + hosts = tuple(default_hosts) props = default_properties current_host = hosts[2] success_host = hosts[4] @@ -77,7 +77,7 @@ def test_failover(plugin_service_mock, connection_mock, default_properties, defa expected.append(call(current_host.all_aliases, HostAvailability.UNAVAILABLE)) exception = Exception("Test Exception") - def force_connect_side_effect(host_info, properties) -> Connection: + def force_connect_side_effect(host_info, _) -> Connection: if host_info == success_host: return connection_mock else: @@ -105,10 +105,8 @@ def test_failover_timeout(plugin_service_mock, connection_mock, default_properti props = default_properties current_host = hosts[2] - def force_connect_side_effect(host_info, properties) -> Connection: - # Sleep for 1 second 20 times unless interrupted by timeout - for _ in range(0, 20): - sleep(1) + def force_connect_side_effect(_, __) -> Connection: + sleep(20) # The failover handler should hit the max timeout before we hit the 20-second wait and return early. return connection_mock plugin_service_mock.force_connect.side_effect = force_connect_side_effect @@ -134,17 +132,12 @@ def force_connect_side_effect(host_info, properties) -> Connection: assert duration < 6.1 -def test_failover_null_or_empty_host_list(plugin_service_mock, connection_mock, default_properties, default_hosts): +def test_failover_empty_host_list(plugin_service_mock, connection_mock, default_properties, default_hosts): props = default_properties target: ReaderFailoverHandler = ReaderFailoverHandlerImpl(plugin_service_mock, props) current_host: HostInfo = HostInfo("writer", 1234) - result = target.failover(None, current_host) - assert not result.is_connected - assert result.connection is None - assert result.new_host is None - - result = target.failover([], current_host) + result = target.failover((), current_host) assert not result.is_connected assert result.connection is None assert result.new_host is None @@ -156,7 +149,7 @@ def test_get_reader_connection_success(plugin_service_mock, connection_mock, def slow_host = hosts[1] fast_host = hosts[2] - def force_connect_side_effect(host_info, properties) -> Connection: + def force_connect_side_effect(host_info, _) -> Connection: # we want slow host to take 20 seconds before returning connection if host_info == slow_host: sleep(20) @@ -180,7 +173,7 @@ def test_get_reader_connection_failure(plugin_service_mock, connection_mock, def props = default_properties exception = Exception("Test Exception") - def force_connect_side_effect(host_info, properties) -> Connection: + def force_connect_side_effect(_, __) -> Connection: raise exception plugin_service_mock.force_connect.side_effect = force_connect_side_effect @@ -199,7 +192,7 @@ def test_get_reader_connection_attempts_timeout(plugin_service_mock, connection_ hosts = default_hosts[0:3] props = default_properties - def force_connect_side_effect(host_info, properties) -> Connection: + def force_connect_side_effect(_, __) -> Connection: try: sleep(5) except Exception: diff --git a/tests/unit/test_writer_failover_handler.py b/tests/unit/test_writer_failover_handler.py index 09cff3b0..325db49a 100644 --- a/tests/unit/test_writer_failover_handler.py +++ b/tests/unit/test_writer_failover_handler.py @@ -97,12 +97,12 @@ def reader_b(): @pytest.fixture def topology(writer, reader_a, reader_b): - return [writer, reader_a, reader_b] + return tuple([writer, reader_a, reader_b]) @pytest.fixture def new_topology(new_writer_host, reader_a, reader_b): - return [new_writer_host, reader_a, reader_b] + return tuple([new_writer_host, reader_a, reader_b]) @pytest.fixture(autouse=True) @@ -155,7 +155,7 @@ def test_reconnect_to_writer_slow_task_b( expected = [call(writer.as_aliases(), HostAvailability.UNAVAILABLE), call(writer.as_aliases(), HostAvailability.AVAILABLE)] - mock_hosts_property = mocker.PropertyMock(side_effect=chain([topology], cycle([new_topology]))) + mock_hosts_property = mocker.PropertyMock(side_effect=chain((topology,), cycle((new_topology,)))) type(plugin_service_mock).all_hosts = mock_hosts_property def force_connect_side_effect(host_info, _) -> Connection: From f3932259d54583385e5b8eccd7aa5469117f6aff Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Tue, 17 Jun 2025 09:34:12 -0700 Subject: [PATCH 14/41] Attempt to fix test_verify_urls --- tests/unit/test_verify_links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_verify_links.py b/tests/unit/test_verify_links.py index 97dcba41..6be8ae38 100644 --- a/tests/unit/test_verify_links.py +++ b/tests/unit/test_verify_links.py @@ -68,4 +68,4 @@ def test_verify_urls(urls_list: list): response = request("GET", url) assert "jdbc" not in url - assert response.status_code == 200 + assert response.status_code in [200, 202], f"URL {url} returned status code {response.status_code}" From fd493b3b8aa44e4063eec0e924640f3dea7fce8d Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Tue, 17 Jun 2025 09:52:44 -0700 Subject: [PATCH 15/41] integ-tests wip --- tests/integration/container/conftest.py | 2 +- tests/integration/container/test_aurora_failover.py | 6 ++++-- tests/integration/container/test_basic_connectivity.py | 6 ++++-- tests/integration/container/test_basic_functionality.py | 4 +++- tests/integration/container/test_custom_endpoint.py | 4 +++- tests/integration/container/test_iam_authentication.py | 4 +++- tests/integration/container/test_read_write_splitting.py | 8 ++++++-- .../container/utils/database_engine_deployment.py | 3 ++- tests/integration/container/utils/rds_test_utility.py | 6 +++--- .../container/utils/test_environment_features.py | 1 + .../integration/container/utils/test_environment_info.py | 9 +++++++++ 11 files changed, 39 insertions(+), 14 deletions(-) diff --git a/tests/integration/container/conftest.py b/tests/integration/container/conftest.py index 8bf68ece..1ebfdd7a 100644 --- a/tests/integration/container/conftest.py +++ b/tests/integration/container/conftest.py @@ -84,7 +84,7 @@ def pytest_runtest_setup(item): ProxyHelper.enable_all_connectivity() deployment = request.get_database_engine_deployment() - if DatabaseEngineDeployment.AURORA == deployment or DatabaseEngineDeployment.RDS_MULTI_AZ == deployment: + if DatabaseEngineDeployment.AURORA == deployment or DatabaseEngineDeployment.MULTI_AZ_CLUSTER == deployment: rds_utility = RdsTestUtility(info.get_region(), info.get_rds_endpoint()) rds_utility.wait_until_cluster_has_desired_status(info.get_db_name(), "available") diff --git a/tests/integration/container/test_aurora_failover.py b/tests/integration/container/test_aurora_failover.py index 1a303e33..e40e18ff 100644 --- a/tests/integration/container/test_aurora_failover.py +++ b/tests/integration/container/test_aurora_failover.py @@ -41,8 +41,10 @@ @enable_on_num_instances(min_instances=2) -@enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.RDS_MULTI_AZ]) -@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, TestEnvironmentFeatures.PERFORMANCE]) +@enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.MULTI_AZ_CLUSTER]) +@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, + TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT, + TestEnvironmentFeatures.PERFORMANCE]) class TestAuroraFailover: IDLE_CONNECTIONS_NUM: int = 5 logger = Logger(__name__) diff --git a/tests/integration/container/test_basic_connectivity.py b/tests/integration/container/test_basic_connectivity.py index ef03614e..77aa166a 100644 --- a/tests/integration/container/test_basic_connectivity.py +++ b/tests/integration/container/test_basic_connectivity.py @@ -36,7 +36,9 @@ from .utils.test_environment_features import TestEnvironmentFeatures -@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, TestEnvironmentFeatures.PERFORMANCE]) +@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, + TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT, + TestEnvironmentFeatures.PERFORMANCE]) class TestBasicConnectivity: @pytest.fixture(scope='class') @@ -125,7 +127,7 @@ def test_proxied_wrapper_connection_failed( assert True @enable_on_num_instances(min_instances=2) - @enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.RDS_MULTI_AZ]) + @enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.MULTI_AZ_CLUSTER]) @enable_on_features([TestEnvironmentFeatures.ABORT_CONNECTION_SUPPORTED]) def test_wrapper_connection_reader_cluster_with_efm_enabled(self, test_driver: TestDriver, conn_utils): target_driver_connect = DriverHelper.get_connect_func(test_driver) diff --git a/tests/integration/container/test_basic_functionality.py b/tests/integration/container/test_basic_functionality.py index 34f66c62..005fcb6c 100644 --- a/tests/integration/container/test_basic_functionality.py +++ b/tests/integration/container/test_basic_functionality.py @@ -46,7 +46,9 @@ from .utils.test_environment_features import TestEnvironmentFeatures -@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, TestEnvironmentFeatures.PERFORMANCE]) +@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, + TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT, + TestEnvironmentFeatures.PERFORMANCE]) class TestBasicFunctionality: @pytest.fixture(scope='class') diff --git a/tests/integration/container/test_custom_endpoint.py b/tests/integration/container/test_custom_endpoint.py index ee33bcfa..57162437 100644 --- a/tests/integration/container/test_custom_endpoint.py +++ b/tests/integration/container/test_custom_endpoint.py @@ -45,7 +45,9 @@ @enable_on_num_instances(min_instances=3) @enable_on_deployments([DatabaseEngineDeployment.AURORA]) -@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, TestEnvironmentFeatures.PERFORMANCE]) +@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, + TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT, + TestEnvironmentFeatures.PERFORMANCE]) class TestCustomEndpoint: logger: ClassVar[Logger] = Logger(__name__) endpoint_id: ClassVar[str] = f"test-endpoint-1-{uuid4()}" diff --git a/tests/integration/container/test_iam_authentication.py b/tests/integration/container/test_iam_authentication.py index 0e4e2e01..4bc6ee3d 100644 --- a/tests/integration/container/test_iam_authentication.py +++ b/tests/integration/container/test_iam_authentication.py @@ -39,7 +39,9 @@ @enable_on_features([TestEnvironmentFeatures.IAM]) -@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, TestEnvironmentFeatures.PERFORMANCE]) +@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, + TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT, + TestEnvironmentFeatures.PERFORMANCE]) class TestAwsIamAuthentication: @pytest.fixture(scope='class') diff --git a/tests/integration/container/test_read_write_splitting.py b/tests/integration/container/test_read_write_splitting.py index 86ad049a..dc2d00af 100644 --- a/tests/integration/container/test_read_write_splitting.py +++ b/tests/integration/container/test_read_write_splitting.py @@ -42,8 +42,12 @@ @enable_on_num_instances(min_instances=2) -@enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.RDS_MULTI_AZ]) -@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, TestEnvironmentFeatures.PERFORMANCE]) +@enable_on_deployments([DatabaseEngineDeployment.AURORA, + DatabaseEngineDeployment.MULTI_AZ_CLUSTER, + DatabaseEngineDeployment.MULTI_AZ_INSTANCE]) +@disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, + TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT, + TestEnvironmentFeatures.PERFORMANCE]) class TestReadWriteSplitting: @pytest.fixture(scope='class') def rds_utils(self): diff --git a/tests/integration/container/utils/database_engine_deployment.py b/tests/integration/container/utils/database_engine_deployment.py index 3f542745..38b70aed 100644 --- a/tests/integration/container/utils/database_engine_deployment.py +++ b/tests/integration/container/utils/database_engine_deployment.py @@ -18,5 +18,6 @@ class DatabaseEngineDeployment(str, Enum): DOCKER = "DOCKER" RDS = "RDS" - RDS_MULTI_AZ = "RDS_MULTI_AZ" + MULTI_AZ_CLUSTER = "MULTI_AZ_CLUSTER" + MULTI_AZ_INSTANCE = "MULTI_AZ_INSTANCE" AURORA = "AURORA" diff --git a/tests/integration/container/utils/rds_test_utility.py b/tests/integration/container/utils/rds_test_utility.py index e6f0d320..b49bd7fe 100644 --- a/tests/integration/container/utils/rds_test_utility.py +++ b/tests/integration/container/utils/rds_test_utility.py @@ -133,7 +133,7 @@ def failover_cluster_and_wait_until_writer_changed( cluster_id: Optional[str] = None, target_id: Optional[str] = None) -> None: deployment = TestEnvironment.get_current().get_deployment() - if DatabaseEngineDeployment.RDS_MULTI_AZ == deployment and target_id is not None: + if DatabaseEngineDeployment.MULTI_AZ_CLUSTER == deployment and target_id is not None: raise Exception(Messages.get_formatted("RdsTestUtility.FailoverToTargetNotSupported", target_id, deployment)) start = perf_counter_ns() @@ -227,7 +227,7 @@ def query_instance_id( if DatabaseEngineDeployment.AURORA == database_deployment: return self._query_aurora_instance_id(conn, database_engine) - elif DatabaseEngineDeployment.RDS_MULTI_AZ == database_deployment: + elif DatabaseEngineDeployment.MULTI_AZ_CLUSTER == database_deployment: return self._query_multi_az_instance_id(conn, database_engine) else: raise RuntimeError(Messages.get_formatted( @@ -291,7 +291,7 @@ def get_instance_ids(self) -> List[str]: deployment: DatabaseEngineDeployment = test_environment.get_deployment() if DatabaseEngineDeployment.AURORA == deployment: return self._get_aurora_instance_ids() - elif DatabaseEngineDeployment.RDS_MULTI_AZ == deployment: + elif DatabaseEngineDeployment.MULTI_AZ_CLUSTER == deployment: return self._get_multi_az_instance_ids() else: raise RuntimeError("RdsTestUtility.MethodNotSupportedForDeployment", "get_instance_ids", deployment) diff --git a/tests/integration/container/utils/test_environment_features.py b/tests/integration/container/utils/test_environment_features.py index dfbb7fd9..ec42d197 100644 --- a/tests/integration/container/utils/test_environment_features.py +++ b/tests/integration/container/utils/test_environment_features.py @@ -26,6 +26,7 @@ class TestEnvironmentFeatures(Enum): AWS_CREDENTIALS_ENABLED = "AWS_CREDENTIALS_ENABLED" PERFORMANCE = "PERFORMANCE" RUN_AUTOSCALING_TESTS_ONLY = "RUN_AUTOSCALING_TESTS_ONLY" + BLUE_GREEN_DEPLOYMENT = "BLUE_GREEN_DEPLOYMENT" SKIP_MYSQL_DRIVER_TESTS = "SKIP_MYSQL_DRIVER_TESTS" SKIP_PG_DRIVER_TESTS = "SKIP_PG_DRIVER_TESTS" TELEMETRY_TRACES_ENABLED = "TELEMETRY_TRACES_ENABLED" diff --git a/tests/integration/container/utils/test_environment_info.py b/tests/integration/container/utils/test_environment_info.py index 31c2500b..14afdbcb 100644 --- a/tests/integration/container/utils/test_environment_info.py +++ b/tests/integration/container/utils/test_environment_info.py @@ -107,6 +107,15 @@ def get_db_name(self) -> str: def get_iam_user_name(self) -> str: return self._iam_user_name + def get_bg_deployment_id(self) -> str: + return self._bg_deployment_id + + def get_cluster_parameter_group(self) -> str: + return self._cluster_parameter_group + + def get_random_base(self) -> str: + return self._random_base + def get_traces_telemetry_info(self) -> TestTelemetryInfo: return self._traces_telemetry_info From 476d3fde8ab612366a5c04c8e1fd64cd09779d7c Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Tue, 17 Jun 2025 12:17:15 -0700 Subject: [PATCH 16/41] Attempt to fix test_reader_failover_handler#test_failover --- tests/unit/test_reader_failover_handler.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_reader_failover_handler.py b/tests/unit/test_reader_failover_handler.py index 584d1392..191fe1bb 100644 --- a/tests/unit/test_reader_failover_handler.py +++ b/tests/unit/test_reader_failover_handler.py @@ -64,6 +64,13 @@ def set_available_count_for_host(): def test_failover(plugin_service_mock, connection_mock, default_properties, default_hosts): + """ + original host list: [active writer, active reader, current connection (reader), active + reader, down reader, active reader] + priority order by index (the subsets will be shuffled): [[1, 3, 5], 0, [2, 4]] + connection attempts are made in pairs using the above list + expected test result: successful connection for host at index 4 + """ hosts = tuple(default_hosts) props = default_properties current_host = hosts[2] @@ -73,8 +80,6 @@ def test_failover(plugin_service_mock, connection_mock, default_properties, defa else call(x.all_aliases, HostAvailability.AVAILABLE) for x in hosts] - # current host should be called twice - expected.append(call(current_host.all_aliases, HostAvailability.UNAVAILABLE)) exception = Exception("Test Exception") def force_connect_side_effect(host_info, _) -> Connection: From 688a1a550b37a0cd59c8b6bedc7566152c0f207d Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Tue, 17 Jun 2025 14:32:09 -0700 Subject: [PATCH 17/41] Fix build --- aws_advanced_python_wrapper/blue_green_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index a91a2e83..6b5ec4c3 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -1362,7 +1362,6 @@ def _update_corresponding_nodes(self): for blue_host_info in sorted_blue_readers: self._corresponding_nodes.put(blue_host_info.host, (blue_host_info, green_writer_host_info)) - if source_status.host_names and target_status.host_names: blue_hosts = source_status.host_names green_hosts = target_status.host_names From 83dc3b7b594d0362be3dec09f018c8ffa2789e32 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 20 Jun 2025 10:00:06 -0700 Subject: [PATCH 18/41] Add blue/green deployment tests --- ...rora_initial_connection_strategy_plugin.py | 7 +- .../blue_green_plugin.py | 12 +- .../database_dialect.py | 2 +- aws_advanced_python_wrapper/plugin_service.py | 24 +- .../utils/concurrent.py | 24 +- aws_advanced_python_wrapper/wrapper.py | 8 +- poetry.lock | 490 ++++++- pyproject.toml | 2 + .../container/test_blue_green_deployment.py | 1249 +++++++++++++++++ .../container/utils/rds_test_utility.py | 73 +- tests/unit/test_dialect.py | 8 +- tests/unit/test_secrets_manager_plugin.py | 27 +- 12 files changed, 1876 insertions(+), 50 deletions(-) create mode 100644 tests/integration/container/test_blue_green_deployment.py diff --git a/aws_advanced_python_wrapper/aurora_initial_connection_strategy_plugin.py b/aws_advanced_python_wrapper/aurora_initial_connection_strategy_plugin.py index 1cedf303..7e56daaf 100644 --- a/aws_advanced_python_wrapper/aurora_initial_connection_strategy_plugin.py +++ b/aws_advanced_python_wrapper/aurora_initial_connection_strategy_plugin.py @@ -86,7 +86,7 @@ def _get_verified_writer_connection(self, props: Properties, is_initial_connecti self._plugin_service.force_refresh_host_list(writer_candidate_conn) writer_candidate = self._plugin_service.identify_connection(writer_candidate_conn) - if writer_candidate is not None and writer_candidate.role != HostRole.WRITER: + if writer_candidate is None or writer_candidate.role != HostRole.WRITER: self._close_connection(writer_candidate_conn) self._delay(retry_delay_ms) continue @@ -133,6 +133,11 @@ def _get_verified_reader_connection(self, props: Properties, is_initial_connecti self._plugin_service.force_refresh_host_list(reader_candidate_conn) reader_candidate = self._plugin_service.identify_connection(reader_candidate_conn) + if reader_candidate is None: + self._close_connection(reader_candidate_conn) + self._delay(retry_delay_ms) + continue + if reader_candidate is not None and reader_candidate.role != HostRole.READER: if self._has_no_readers(): # Cluster has no readers. Simulate Aurora reader cluster endpoint logic and return the current writer connection. diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 6b5ec4c3..e98fb2d7 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -767,6 +767,16 @@ def execute(self, target: type, method_name: str, execute_func: Callable, *args: if self._start_time_ns.get() > 0: self._end_time_ns.compare_and_set(0, perf_counter_ns()) + # For testing purposes only. + def get_hold_time_ns(self) -> int: + if self._start_time_ns.get() == 0: + return 0 + + if self._end_time_ns.get() == 0: + return perf_counter_ns() - self._start_time_ns.get() + else: + return self._end_time_ns.get() - self._start_time_ns.get() + class BlueGreenPluginFactory(PluginFactory): def get_instance(self, plugin_service: PluginService, props: Properties) -> Plugin: @@ -1847,7 +1857,7 @@ def _log_switchover_final_summary(self): sorted_phase_entries = sorted(self._phase_times_ns.items(), key=lambda entry: entry[1].timestamp_ns) phase_time_lines = [ f"{entry[1].date_time:>28s} " - f"{'' if time_zero is None else (entry[1].timestamp_ns - time_zero.timestamp_ns) / 1_000_000:>18s} ms " + f"{'' if time_zero is None else (entry[1].timestamp_ns - time_zero.timestamp_ns) // 1_000_000:>18s} ms " f"{entry[0]:>31s}" for entry in sorted_phase_entries ] phase_times_str = "\n".join(phase_time_lines) diff --git a/aws_advanced_python_wrapper/database_dialect.py b/aws_advanced_python_wrapper/database_dialect.py index 56f5a91c..48af73dd 100644 --- a/aws_advanced_python_wrapper/database_dialect.py +++ b/aws_advanced_python_wrapper/database_dialect.py @@ -283,7 +283,7 @@ def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: return False if "source distribution" != record[1].lower(): - return True + return False with closing(conn.cursor()) as cursor: cursor.execute("SHOW VARIABLES LIKE 'report_host'") diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 1ef46540..be4b49d6 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -112,7 +112,8 @@ def plugin_manager(self, value): self._plugin_manager = value -T = TypeVar('T') +StatusType = TypeVar('StatusType') +UnwrapType = TypeVar('UnwrapType') class PluginService(ExceptionHandler, Protocol): @@ -292,11 +293,11 @@ def is_plugin_in_use(self, plugin_class: Type[Plugin]): ... @abstractmethod - def set_status(self, clazz: Type[T], status: Optional[T], key: str): + def set_status(self, clazz: Type[StatusType], status: Optional[StatusType], key: str): ... @abstractmethod - def get_status(self, clazz: Type[T], key: str) -> Optional[T]: + def get_status(self, clazz: Type[StatusType], key: str) -> Optional[StatusType]: ... @@ -677,18 +678,18 @@ def release_resources(self): if host_list_provider is not None and isinstance(host_list_provider, CanReleaseResources): host_list_provider.release_resources() - def set_status(self, clazz: Type[T], status: Optional[T], key: str): + def set_status(self, clazz: Type[StatusType], status: Optional[StatusType], key: str): cache_key = self._get_status_cache_key(clazz, key) if status is None: self._status_cache.remove(cache_key) else: self._status_cache.put(cache_key, status, PluginServiceImpl._STATUS_CACHE_EXPIRATION_NANO) - def _get_status_cache_key(self, clazz: Type[T], key: str) -> str: + def _get_status_cache_key(self, clazz: Type[StatusType], key: str) -> str: key_str = "" if key is None else key.strip().lower() return f"{key_str}::{clazz.__name__}" - def get_status(self, clazz: Type[T], key: str) -> Optional[T]: + def get_status(self, clazz: Type[StatusType], key: str) -> Optional[StatusType]: cache_key = self._get_status_cache_key(clazz, key) status = PluginServiceImpl._status_cache.get(cache_key) if status is None: @@ -1073,6 +1074,17 @@ def is_plugin_in_use(self, plugin_class: Type[Plugin]) -> bool: return False + # For testing purposes only. + def _unwrap(self, unwrap_class: Type[UnwrapType]) -> Optional[UnwrapType]: + if len(self._plugins) < 1: + return None + + for plugin in self._plugins: + if isinstance(plugin, unwrap_class): + return plugin + + return None + def release_resources(self): """ Allows all connection plugins a chance to clean up any dangling resources diff --git a/aws_advanced_python_wrapper/utils/concurrent.py b/aws_advanced_python_wrapper/utils/concurrent.py index cf104494..1c43a685 100644 --- a/aws_advanced_python_wrapper/utils/concurrent.py +++ b/aws_advanced_python_wrapper/utils/concurrent.py @@ -19,7 +19,7 @@ if TYPE_CHECKING: from typing import ItemsView -from threading import Lock, RLock +from threading import Condition, Lock, RLock from typing import Callable, Generic, KeysView, List, Optional, TypeVar K = TypeVar('K') @@ -34,6 +34,9 @@ def __init__(self): def __len__(self): return len(self._dict) + def __contains__(self, key): + return key in self._dict + def get(self, key: K, default_value: Optional[V] = None) -> Optional[V]: return self._dict.get(key, default_value) @@ -136,3 +139,22 @@ def add(self, item: V): def remove(self, item: V): with self._lock: self._set.remove(item) + + +class CountDownLatch: + def __init__(self, count=1): + self.count = count + self.condition = Condition() + + def count_down(self): + with self.condition: + if self.count > 0: + self.count -= 1 + if self.count == 0: + self.condition.notify_all() + + def wait_sec(self, timeout_sec=None): + with self.condition: + if self.count > 0: + return self.condition.wait(timeout_sec) + return True diff --git a/aws_advanced_python_wrapper/wrapper.py b/aws_advanced_python_wrapper/wrapper.py index 7f6681fb..e04d293f 100644 --- a/aws_advanced_python_wrapper/wrapper.py +++ b/aws_advanced_python_wrapper/wrapper.py @@ -15,7 +15,7 @@ from __future__ import annotations from typing import (TYPE_CHECKING, Any, Callable, Iterator, List, Optional, - Union) + Type, TypeVar, Union) if TYPE_CHECKING: from aws_advanced_python_wrapper.host_list_provider import HostListProviderService @@ -40,6 +40,8 @@ logger = Logger(__name__) +UnwrapType = TypeVar('UnwrapType') + class AwsWrapperConnection(Connection, CanReleaseResources): __module__ = "aws_advanced_python_wrapper" @@ -213,6 +215,10 @@ def release_resources(self): if isinstance(self._plugin_service, CanReleaseResources): self._plugin_service.release_resources() + # For testing purposes only + def _unwrap(self, unwrap_class: Type[UnwrapType]) -> Optional[UnwrapType]: + return self._plugin_manager._unwrap(unwrap_class) + def __del__(self): self.release_resources() diff --git a/poetry.lock b/poetry.lock index ee9431b8..d5f3220c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "astor" @@ -124,6 +124,24 @@ urllib3 = [ [package.extras] crt = ["awscrt (==0.23.8)"] +[[package]] +name = "botocore-stubs" +version = "1.38.30" +description = "Type annotations and code completion for botocore" +optional = false +python-versions = ">=3.8" +groups = ["test"] +files = [ + {file = "botocore_stubs-1.38.30-py3-none-any.whl", hash = "sha256:2efb8bdf36504aff596c670d875d8f7dd15205277c15c4cea54afdba8200c266"}, + {file = "botocore_stubs-1.38.30.tar.gz", hash = "sha256:291d7bf39a316c00a8a55b7255489b02c0cea1a343482e7784e8d1e235bae995"}, +] + +[package.dependencies] +types-awscrt = "*" + +[package.extras] +botocore = ["botocore"] + [[package]] name = "certifi" version = "2024.8.30" @@ -1588,6 +1606,21 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] pymysql = ["pymysql"] sqlcipher = ["sqlcipher3_binary"] +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = false +python-versions = ">=3.7" +groups = ["test"] +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + [[package]] name = "toml" version = "0.10.2" @@ -1641,6 +1674,459 @@ files = [ {file = "types_aws_xray_sdk-2.14.0.20240606-py3-none-any.whl", hash = "sha256:c238ad639bb50896f1326c12bcc36b7832b5bc7c4b5e2b19a7efcd89d7d28b94"}, ] +[[package]] +name = "types-awscrt" +version = "0.27.2" +description = "Type annotations and code completion for awscrt" +optional = false +python-versions = ">=3.8" +groups = ["test"] +files = [ + {file = "types_awscrt-0.27.2-py3-none-any.whl", hash = "sha256:49a045f25bbd5ad2865f314512afced933aed35ddbafc252e2268efa8a787e4e"}, + {file = "types_awscrt-0.27.2.tar.gz", hash = "sha256:acd04f57119eb15626ab0ba9157fc24672421de56e7bd7b9f61681fedee44e91"}, +] + +[[package]] +name = "types-boto3" +version = "1.38.40" +description = "Type annotations for boto3 1.38.40 generated with mypy-boto3-builder 8.11.0" +optional = false +python-versions = ">=3.8" +groups = ["test"] +files = [ + {file = "types_boto3-1.38.40-py3-none-any.whl", hash = "sha256:83c8eef33297debdd7994003524977e9b2947aab331cfdd18f81260981a1681a"}, + {file = "types_boto3-1.38.40.tar.gz", hash = "sha256:10e29da59c9d1c24830f1871ee1abdc3e97e5248698e4e6b69737f617c276857"}, +] + +[package.dependencies] +botocore-stubs = "*" +types-s3transfer = "*" +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} + +[package.extras] +accessanalyzer = ["types-boto3-accessanalyzer (>=1.38.0,<1.39.0)"] +account = ["types-boto3-account (>=1.38.0,<1.39.0)"] +acm = ["types-boto3-acm (>=1.38.0,<1.39.0)"] +acm-pca = ["types-boto3-acm-pca (>=1.38.0,<1.39.0)"] +aiops = ["types-boto3-aiops (>=1.38.0,<1.39.0)"] +all = ["types-boto3-accessanalyzer (>=1.38.0,<1.39.0)", "types-boto3-account (>=1.38.0,<1.39.0)", "types-boto3-acm (>=1.38.0,<1.39.0)", "types-boto3-acm-pca (>=1.38.0,<1.39.0)", "types-boto3-aiops (>=1.38.0,<1.39.0)", "types-boto3-amp (>=1.38.0,<1.39.0)", "types-boto3-amplify (>=1.38.0,<1.39.0)", "types-boto3-amplifybackend (>=1.38.0,<1.39.0)", "types-boto3-amplifyuibuilder (>=1.38.0,<1.39.0)", "types-boto3-apigateway (>=1.38.0,<1.39.0)", "types-boto3-apigatewaymanagementapi (>=1.38.0,<1.39.0)", "types-boto3-apigatewayv2 (>=1.38.0,<1.39.0)", "types-boto3-appconfig (>=1.38.0,<1.39.0)", "types-boto3-appconfigdata (>=1.38.0,<1.39.0)", "types-boto3-appfabric (>=1.38.0,<1.39.0)", "types-boto3-appflow (>=1.38.0,<1.39.0)", "types-boto3-appintegrations (>=1.38.0,<1.39.0)", "types-boto3-application-autoscaling (>=1.38.0,<1.39.0)", "types-boto3-application-insights (>=1.38.0,<1.39.0)", "types-boto3-application-signals (>=1.38.0,<1.39.0)", "types-boto3-applicationcostprofiler (>=1.38.0,<1.39.0)", "types-boto3-appmesh (>=1.38.0,<1.39.0)", "types-boto3-apprunner (>=1.38.0,<1.39.0)", "types-boto3-appstream (>=1.38.0,<1.39.0)", "types-boto3-appsync (>=1.38.0,<1.39.0)", "types-boto3-apptest (>=1.38.0,<1.39.0)", "types-boto3-arc-zonal-shift (>=1.38.0,<1.39.0)", "types-boto3-artifact (>=1.38.0,<1.39.0)", "types-boto3-athena (>=1.38.0,<1.39.0)", "types-boto3-auditmanager (>=1.38.0,<1.39.0)", "types-boto3-autoscaling (>=1.38.0,<1.39.0)", "types-boto3-autoscaling-plans (>=1.38.0,<1.39.0)", "types-boto3-b2bi (>=1.38.0,<1.39.0)", "types-boto3-backup (>=1.38.0,<1.39.0)", "types-boto3-backup-gateway (>=1.38.0,<1.39.0)", "types-boto3-backupsearch (>=1.38.0,<1.39.0)", "types-boto3-batch (>=1.38.0,<1.39.0)", "types-boto3-bcm-data-exports (>=1.38.0,<1.39.0)", "types-boto3-bcm-pricing-calculator (>=1.38.0,<1.39.0)", "types-boto3-bedrock (>=1.38.0,<1.39.0)", "types-boto3-bedrock-agent (>=1.38.0,<1.39.0)", "types-boto3-bedrock-agent-runtime (>=1.38.0,<1.39.0)", "types-boto3-bedrock-data-automation (>=1.38.0,<1.39.0)", "types-boto3-bedrock-data-automation-runtime (>=1.38.0,<1.39.0)", "types-boto3-bedrock-runtime (>=1.38.0,<1.39.0)", "types-boto3-billing (>=1.38.0,<1.39.0)", "types-boto3-billingconductor (>=1.38.0,<1.39.0)", "types-boto3-braket (>=1.38.0,<1.39.0)", "types-boto3-budgets (>=1.38.0,<1.39.0)", "types-boto3-ce (>=1.38.0,<1.39.0)", "types-boto3-chatbot (>=1.38.0,<1.39.0)", "types-boto3-chime (>=1.38.0,<1.39.0)", "types-boto3-chime-sdk-identity (>=1.38.0,<1.39.0)", "types-boto3-chime-sdk-media-pipelines (>=1.38.0,<1.39.0)", "types-boto3-chime-sdk-meetings (>=1.38.0,<1.39.0)", "types-boto3-chime-sdk-messaging (>=1.38.0,<1.39.0)", "types-boto3-chime-sdk-voice (>=1.38.0,<1.39.0)", "types-boto3-cleanrooms (>=1.38.0,<1.39.0)", "types-boto3-cleanroomsml (>=1.38.0,<1.39.0)", "types-boto3-cloud9 (>=1.38.0,<1.39.0)", "types-boto3-cloudcontrol (>=1.38.0,<1.39.0)", "types-boto3-clouddirectory (>=1.38.0,<1.39.0)", "types-boto3-cloudformation (>=1.38.0,<1.39.0)", "types-boto3-cloudfront (>=1.38.0,<1.39.0)", "types-boto3-cloudfront-keyvaluestore (>=1.38.0,<1.39.0)", "types-boto3-cloudhsm (>=1.38.0,<1.39.0)", "types-boto3-cloudhsmv2 (>=1.38.0,<1.39.0)", "types-boto3-cloudsearch (>=1.38.0,<1.39.0)", "types-boto3-cloudsearchdomain (>=1.38.0,<1.39.0)", "types-boto3-cloudtrail (>=1.38.0,<1.39.0)", "types-boto3-cloudtrail-data (>=1.38.0,<1.39.0)", "types-boto3-cloudwatch (>=1.38.0,<1.39.0)", "types-boto3-codeartifact (>=1.38.0,<1.39.0)", "types-boto3-codebuild (>=1.38.0,<1.39.0)", "types-boto3-codecatalyst (>=1.38.0,<1.39.0)", "types-boto3-codecommit (>=1.38.0,<1.39.0)", "types-boto3-codeconnections (>=1.38.0,<1.39.0)", "types-boto3-codedeploy (>=1.38.0,<1.39.0)", "types-boto3-codeguru-reviewer (>=1.38.0,<1.39.0)", "types-boto3-codeguru-security (>=1.38.0,<1.39.0)", "types-boto3-codeguruprofiler (>=1.38.0,<1.39.0)", "types-boto3-codepipeline (>=1.38.0,<1.39.0)", "types-boto3-codestar-connections (>=1.38.0,<1.39.0)", "types-boto3-codestar-notifications (>=1.38.0,<1.39.0)", "types-boto3-cognito-identity (>=1.38.0,<1.39.0)", "types-boto3-cognito-idp (>=1.38.0,<1.39.0)", "types-boto3-cognito-sync (>=1.38.0,<1.39.0)", "types-boto3-comprehend (>=1.38.0,<1.39.0)", "types-boto3-comprehendmedical (>=1.38.0,<1.39.0)", "types-boto3-compute-optimizer (>=1.38.0,<1.39.0)", "types-boto3-config (>=1.38.0,<1.39.0)", "types-boto3-connect (>=1.38.0,<1.39.0)", "types-boto3-connect-contact-lens (>=1.38.0,<1.39.0)", "types-boto3-connectcampaigns (>=1.38.0,<1.39.0)", "types-boto3-connectcampaignsv2 (>=1.38.0,<1.39.0)", "types-boto3-connectcases (>=1.38.0,<1.39.0)", "types-boto3-connectparticipant (>=1.38.0,<1.39.0)", "types-boto3-controlcatalog (>=1.38.0,<1.39.0)", "types-boto3-controltower (>=1.38.0,<1.39.0)", "types-boto3-cost-optimization-hub (>=1.38.0,<1.39.0)", "types-boto3-cur (>=1.38.0,<1.39.0)", "types-boto3-customer-profiles (>=1.38.0,<1.39.0)", "types-boto3-databrew (>=1.38.0,<1.39.0)", "types-boto3-dataexchange (>=1.38.0,<1.39.0)", "types-boto3-datapipeline (>=1.38.0,<1.39.0)", "types-boto3-datasync (>=1.38.0,<1.39.0)", "types-boto3-datazone (>=1.38.0,<1.39.0)", "types-boto3-dax (>=1.38.0,<1.39.0)", "types-boto3-deadline (>=1.38.0,<1.39.0)", "types-boto3-detective (>=1.38.0,<1.39.0)", "types-boto3-devicefarm (>=1.38.0,<1.39.0)", "types-boto3-devops-guru (>=1.38.0,<1.39.0)", "types-boto3-directconnect (>=1.38.0,<1.39.0)", "types-boto3-discovery (>=1.38.0,<1.39.0)", "types-boto3-dlm (>=1.38.0,<1.39.0)", "types-boto3-dms (>=1.38.0,<1.39.0)", "types-boto3-docdb (>=1.38.0,<1.39.0)", "types-boto3-docdb-elastic (>=1.38.0,<1.39.0)", "types-boto3-drs (>=1.38.0,<1.39.0)", "types-boto3-ds (>=1.38.0,<1.39.0)", "types-boto3-ds-data (>=1.38.0,<1.39.0)", "types-boto3-dsql (>=1.38.0,<1.39.0)", "types-boto3-dynamodb (>=1.38.0,<1.39.0)", "types-boto3-dynamodbstreams (>=1.38.0,<1.39.0)", "types-boto3-ebs (>=1.38.0,<1.39.0)", "types-boto3-ec2 (>=1.38.0,<1.39.0)", "types-boto3-ec2-instance-connect (>=1.38.0,<1.39.0)", "types-boto3-ecr (>=1.38.0,<1.39.0)", "types-boto3-ecr-public (>=1.38.0,<1.39.0)", "types-boto3-ecs (>=1.38.0,<1.39.0)", "types-boto3-efs (>=1.38.0,<1.39.0)", "types-boto3-eks (>=1.38.0,<1.39.0)", "types-boto3-eks-auth (>=1.38.0,<1.39.0)", "types-boto3-elasticache (>=1.38.0,<1.39.0)", "types-boto3-elasticbeanstalk (>=1.38.0,<1.39.0)", "types-boto3-elastictranscoder (>=1.38.0,<1.39.0)", "types-boto3-elb (>=1.38.0,<1.39.0)", "types-boto3-elbv2 (>=1.38.0,<1.39.0)", "types-boto3-emr (>=1.38.0,<1.39.0)", "types-boto3-emr-containers (>=1.38.0,<1.39.0)", "types-boto3-emr-serverless (>=1.38.0,<1.39.0)", "types-boto3-entityresolution (>=1.38.0,<1.39.0)", "types-boto3-es (>=1.38.0,<1.39.0)", "types-boto3-events (>=1.38.0,<1.39.0)", "types-boto3-evidently (>=1.38.0,<1.39.0)", "types-boto3-evs (>=1.38.0,<1.39.0)", "types-boto3-finspace (>=1.38.0,<1.39.0)", "types-boto3-finspace-data (>=1.38.0,<1.39.0)", "types-boto3-firehose (>=1.38.0,<1.39.0)", "types-boto3-fis (>=1.38.0,<1.39.0)", "types-boto3-fms (>=1.38.0,<1.39.0)", "types-boto3-forecast (>=1.38.0,<1.39.0)", "types-boto3-forecastquery (>=1.38.0,<1.39.0)", "types-boto3-frauddetector (>=1.38.0,<1.39.0)", "types-boto3-freetier (>=1.38.0,<1.39.0)", "types-boto3-fsx (>=1.38.0,<1.39.0)", "types-boto3-gamelift (>=1.38.0,<1.39.0)", "types-boto3-gameliftstreams (>=1.38.0,<1.39.0)", "types-boto3-geo-maps (>=1.38.0,<1.39.0)", "types-boto3-geo-places (>=1.38.0,<1.39.0)", "types-boto3-geo-routes (>=1.38.0,<1.39.0)", "types-boto3-glacier (>=1.38.0,<1.39.0)", "types-boto3-globalaccelerator (>=1.38.0,<1.39.0)", "types-boto3-glue (>=1.38.0,<1.39.0)", "types-boto3-grafana (>=1.38.0,<1.39.0)", "types-boto3-greengrass (>=1.38.0,<1.39.0)", "types-boto3-greengrassv2 (>=1.38.0,<1.39.0)", "types-boto3-groundstation (>=1.38.0,<1.39.0)", "types-boto3-guardduty (>=1.38.0,<1.39.0)", "types-boto3-health (>=1.38.0,<1.39.0)", "types-boto3-healthlake (>=1.38.0,<1.39.0)", "types-boto3-iam (>=1.38.0,<1.39.0)", "types-boto3-identitystore (>=1.38.0,<1.39.0)", "types-boto3-imagebuilder (>=1.38.0,<1.39.0)", "types-boto3-importexport (>=1.38.0,<1.39.0)", "types-boto3-inspector (>=1.38.0,<1.39.0)", "types-boto3-inspector-scan (>=1.38.0,<1.39.0)", "types-boto3-inspector2 (>=1.38.0,<1.39.0)", "types-boto3-internetmonitor (>=1.38.0,<1.39.0)", "types-boto3-invoicing (>=1.38.0,<1.39.0)", "types-boto3-iot (>=1.38.0,<1.39.0)", "types-boto3-iot-data (>=1.38.0,<1.39.0)", "types-boto3-iot-jobs-data (>=1.38.0,<1.39.0)", "types-boto3-iot-managed-integrations (>=1.38.0,<1.39.0)", "types-boto3-iotanalytics (>=1.38.0,<1.39.0)", "types-boto3-iotdeviceadvisor (>=1.38.0,<1.39.0)", "types-boto3-iotevents (>=1.38.0,<1.39.0)", "types-boto3-iotevents-data (>=1.38.0,<1.39.0)", "types-boto3-iotfleethub (>=1.38.0,<1.39.0)", "types-boto3-iotfleetwise (>=1.38.0,<1.39.0)", "types-boto3-iotsecuretunneling (>=1.38.0,<1.39.0)", "types-boto3-iotsitewise (>=1.38.0,<1.39.0)", "types-boto3-iotthingsgraph (>=1.38.0,<1.39.0)", "types-boto3-iottwinmaker (>=1.38.0,<1.39.0)", "types-boto3-iotwireless (>=1.38.0,<1.39.0)", "types-boto3-ivs (>=1.38.0,<1.39.0)", "types-boto3-ivs-realtime (>=1.38.0,<1.39.0)", "types-boto3-ivschat (>=1.38.0,<1.39.0)", "types-boto3-kafka (>=1.38.0,<1.39.0)", "types-boto3-kafkaconnect (>=1.38.0,<1.39.0)", "types-boto3-kendra (>=1.38.0,<1.39.0)", "types-boto3-kendra-ranking (>=1.38.0,<1.39.0)", "types-boto3-keyspaces (>=1.38.0,<1.39.0)", "types-boto3-kinesis (>=1.38.0,<1.39.0)", "types-boto3-kinesis-video-archived-media (>=1.38.0,<1.39.0)", "types-boto3-kinesis-video-media (>=1.38.0,<1.39.0)", "types-boto3-kinesis-video-signaling (>=1.38.0,<1.39.0)", "types-boto3-kinesis-video-webrtc-storage (>=1.38.0,<1.39.0)", "types-boto3-kinesisanalytics (>=1.38.0,<1.39.0)", "types-boto3-kinesisanalyticsv2 (>=1.38.0,<1.39.0)", "types-boto3-kinesisvideo (>=1.38.0,<1.39.0)", "types-boto3-kms (>=1.38.0,<1.39.0)", "types-boto3-lakeformation (>=1.38.0,<1.39.0)", "types-boto3-lambda (>=1.38.0,<1.39.0)", "types-boto3-launch-wizard (>=1.38.0,<1.39.0)", "types-boto3-lex-models (>=1.38.0,<1.39.0)", "types-boto3-lex-runtime (>=1.38.0,<1.39.0)", "types-boto3-lexv2-models (>=1.38.0,<1.39.0)", "types-boto3-lexv2-runtime (>=1.38.0,<1.39.0)", "types-boto3-license-manager (>=1.38.0,<1.39.0)", "types-boto3-license-manager-linux-subscriptions (>=1.38.0,<1.39.0)", "types-boto3-license-manager-user-subscriptions (>=1.38.0,<1.39.0)", "types-boto3-lightsail (>=1.38.0,<1.39.0)", "types-boto3-location (>=1.38.0,<1.39.0)", "types-boto3-logs (>=1.38.0,<1.39.0)", "types-boto3-lookoutequipment (>=1.38.0,<1.39.0)", "types-boto3-lookoutmetrics (>=1.38.0,<1.39.0)", "types-boto3-lookoutvision (>=1.38.0,<1.39.0)", "types-boto3-m2 (>=1.38.0,<1.39.0)", "types-boto3-machinelearning (>=1.38.0,<1.39.0)", "types-boto3-macie2 (>=1.38.0,<1.39.0)", "types-boto3-mailmanager (>=1.38.0,<1.39.0)", "types-boto3-managedblockchain (>=1.38.0,<1.39.0)", "types-boto3-managedblockchain-query (>=1.38.0,<1.39.0)", "types-boto3-marketplace-agreement (>=1.38.0,<1.39.0)", "types-boto3-marketplace-catalog (>=1.38.0,<1.39.0)", "types-boto3-marketplace-deployment (>=1.38.0,<1.39.0)", "types-boto3-marketplace-entitlement (>=1.38.0,<1.39.0)", "types-boto3-marketplace-reporting (>=1.38.0,<1.39.0)", "types-boto3-marketplacecommerceanalytics (>=1.38.0,<1.39.0)", "types-boto3-mediaconnect (>=1.38.0,<1.39.0)", "types-boto3-mediaconvert (>=1.38.0,<1.39.0)", "types-boto3-medialive (>=1.38.0,<1.39.0)", "types-boto3-mediapackage (>=1.38.0,<1.39.0)", "types-boto3-mediapackage-vod (>=1.38.0,<1.39.0)", "types-boto3-mediapackagev2 (>=1.38.0,<1.39.0)", "types-boto3-mediastore (>=1.38.0,<1.39.0)", "types-boto3-mediastore-data (>=1.38.0,<1.39.0)", "types-boto3-mediatailor (>=1.38.0,<1.39.0)", "types-boto3-medical-imaging (>=1.38.0,<1.39.0)", "types-boto3-memorydb (>=1.38.0,<1.39.0)", "types-boto3-meteringmarketplace (>=1.38.0,<1.39.0)", "types-boto3-mgh (>=1.38.0,<1.39.0)", "types-boto3-mgn (>=1.38.0,<1.39.0)", "types-boto3-migration-hub-refactor-spaces (>=1.38.0,<1.39.0)", "types-boto3-migrationhub-config (>=1.38.0,<1.39.0)", "types-boto3-migrationhuborchestrator (>=1.38.0,<1.39.0)", "types-boto3-migrationhubstrategy (>=1.38.0,<1.39.0)", "types-boto3-mpa (>=1.38.0,<1.39.0)", "types-boto3-mq (>=1.38.0,<1.39.0)", "types-boto3-mturk (>=1.38.0,<1.39.0)", "types-boto3-mwaa (>=1.38.0,<1.39.0)", "types-boto3-neptune (>=1.38.0,<1.39.0)", "types-boto3-neptune-graph (>=1.38.0,<1.39.0)", "types-boto3-neptunedata (>=1.38.0,<1.39.0)", "types-boto3-network-firewall (>=1.38.0,<1.39.0)", "types-boto3-networkflowmonitor (>=1.38.0,<1.39.0)", "types-boto3-networkmanager (>=1.38.0,<1.39.0)", "types-boto3-networkmonitor (>=1.38.0,<1.39.0)", "types-boto3-notifications (>=1.38.0,<1.39.0)", "types-boto3-notificationscontacts (>=1.38.0,<1.39.0)", "types-boto3-oam (>=1.38.0,<1.39.0)", "types-boto3-observabilityadmin (>=1.38.0,<1.39.0)", "types-boto3-omics (>=1.38.0,<1.39.0)", "types-boto3-opensearch (>=1.38.0,<1.39.0)", "types-boto3-opensearchserverless (>=1.38.0,<1.39.0)", "types-boto3-opsworks (>=1.38.0,<1.39.0)", "types-boto3-opsworkscm (>=1.38.0,<1.39.0)", "types-boto3-organizations (>=1.38.0,<1.39.0)", "types-boto3-osis (>=1.38.0,<1.39.0)", "types-boto3-outposts (>=1.38.0,<1.39.0)", "types-boto3-panorama (>=1.38.0,<1.39.0)", "types-boto3-partnercentral-selling (>=1.38.0,<1.39.0)", "types-boto3-payment-cryptography (>=1.38.0,<1.39.0)", "types-boto3-payment-cryptography-data (>=1.38.0,<1.39.0)", "types-boto3-pca-connector-ad (>=1.38.0,<1.39.0)", "types-boto3-pca-connector-scep (>=1.38.0,<1.39.0)", "types-boto3-pcs (>=1.38.0,<1.39.0)", "types-boto3-personalize (>=1.38.0,<1.39.0)", "types-boto3-personalize-events (>=1.38.0,<1.39.0)", "types-boto3-personalize-runtime (>=1.38.0,<1.39.0)", "types-boto3-pi (>=1.38.0,<1.39.0)", "types-boto3-pinpoint (>=1.38.0,<1.39.0)", "types-boto3-pinpoint-email (>=1.38.0,<1.39.0)", "types-boto3-pinpoint-sms-voice (>=1.38.0,<1.39.0)", "types-boto3-pinpoint-sms-voice-v2 (>=1.38.0,<1.39.0)", "types-boto3-pipes (>=1.38.0,<1.39.0)", "types-boto3-polly (>=1.38.0,<1.39.0)", "types-boto3-pricing (>=1.38.0,<1.39.0)", "types-boto3-proton (>=1.38.0,<1.39.0)", "types-boto3-qapps (>=1.38.0,<1.39.0)", "types-boto3-qbusiness (>=1.38.0,<1.39.0)", "types-boto3-qconnect (>=1.38.0,<1.39.0)", "types-boto3-qldb (>=1.38.0,<1.39.0)", "types-boto3-qldb-session (>=1.38.0,<1.39.0)", "types-boto3-quicksight (>=1.38.0,<1.39.0)", "types-boto3-ram (>=1.38.0,<1.39.0)", "types-boto3-rbin (>=1.38.0,<1.39.0)", "types-boto3-rds (>=1.38.0,<1.39.0)", "types-boto3-rds-data (>=1.38.0,<1.39.0)", "types-boto3-redshift (>=1.38.0,<1.39.0)", "types-boto3-redshift-data (>=1.38.0,<1.39.0)", "types-boto3-redshift-serverless (>=1.38.0,<1.39.0)", "types-boto3-rekognition (>=1.38.0,<1.39.0)", "types-boto3-repostspace (>=1.38.0,<1.39.0)", "types-boto3-resiliencehub (>=1.38.0,<1.39.0)", "types-boto3-resource-explorer-2 (>=1.38.0,<1.39.0)", "types-boto3-resource-groups (>=1.38.0,<1.39.0)", "types-boto3-resourcegroupstaggingapi (>=1.38.0,<1.39.0)", "types-boto3-robomaker (>=1.38.0,<1.39.0)", "types-boto3-rolesanywhere (>=1.38.0,<1.39.0)", "types-boto3-route53 (>=1.38.0,<1.39.0)", "types-boto3-route53-recovery-cluster (>=1.38.0,<1.39.0)", "types-boto3-route53-recovery-control-config (>=1.38.0,<1.39.0)", "types-boto3-route53-recovery-readiness (>=1.38.0,<1.39.0)", "types-boto3-route53domains (>=1.38.0,<1.39.0)", "types-boto3-route53profiles (>=1.38.0,<1.39.0)", "types-boto3-route53resolver (>=1.38.0,<1.39.0)", "types-boto3-rum (>=1.38.0,<1.39.0)", "types-boto3-s3 (>=1.38.0,<1.39.0)", "types-boto3-s3control (>=1.38.0,<1.39.0)", "types-boto3-s3outposts (>=1.38.0,<1.39.0)", "types-boto3-s3tables (>=1.38.0,<1.39.0)", "types-boto3-sagemaker (>=1.38.0,<1.39.0)", "types-boto3-sagemaker-a2i-runtime (>=1.38.0,<1.39.0)", "types-boto3-sagemaker-edge (>=1.38.0,<1.39.0)", "types-boto3-sagemaker-featurestore-runtime (>=1.38.0,<1.39.0)", "types-boto3-sagemaker-geospatial (>=1.38.0,<1.39.0)", "types-boto3-sagemaker-metrics (>=1.38.0,<1.39.0)", "types-boto3-sagemaker-runtime (>=1.38.0,<1.39.0)", "types-boto3-savingsplans (>=1.38.0,<1.39.0)", "types-boto3-scheduler (>=1.38.0,<1.39.0)", "types-boto3-schemas (>=1.38.0,<1.39.0)", "types-boto3-sdb (>=1.38.0,<1.39.0)", "types-boto3-secretsmanager (>=1.38.0,<1.39.0)", "types-boto3-security-ir (>=1.38.0,<1.39.0)", "types-boto3-securityhub (>=1.38.0,<1.39.0)", "types-boto3-securitylake (>=1.38.0,<1.39.0)", "types-boto3-serverlessrepo (>=1.38.0,<1.39.0)", "types-boto3-service-quotas (>=1.38.0,<1.39.0)", "types-boto3-servicecatalog (>=1.38.0,<1.39.0)", "types-boto3-servicecatalog-appregistry (>=1.38.0,<1.39.0)", "types-boto3-servicediscovery (>=1.38.0,<1.39.0)", "types-boto3-ses (>=1.38.0,<1.39.0)", "types-boto3-sesv2 (>=1.38.0,<1.39.0)", "types-boto3-shield (>=1.38.0,<1.39.0)", "types-boto3-signer (>=1.38.0,<1.39.0)", "types-boto3-simspaceweaver (>=1.38.0,<1.39.0)", "types-boto3-sms (>=1.38.0,<1.39.0)", "types-boto3-snow-device-management (>=1.38.0,<1.39.0)", "types-boto3-snowball (>=1.38.0,<1.39.0)", "types-boto3-sns (>=1.38.0,<1.39.0)", "types-boto3-socialmessaging (>=1.38.0,<1.39.0)", "types-boto3-sqs (>=1.38.0,<1.39.0)", "types-boto3-ssm (>=1.38.0,<1.39.0)", "types-boto3-ssm-contacts (>=1.38.0,<1.39.0)", "types-boto3-ssm-guiconnect (>=1.38.0,<1.39.0)", "types-boto3-ssm-incidents (>=1.38.0,<1.39.0)", "types-boto3-ssm-quicksetup (>=1.38.0,<1.39.0)", "types-boto3-ssm-sap (>=1.38.0,<1.39.0)", "types-boto3-sso (>=1.38.0,<1.39.0)", "types-boto3-sso-admin (>=1.38.0,<1.39.0)", "types-boto3-sso-oidc (>=1.38.0,<1.39.0)", "types-boto3-stepfunctions (>=1.38.0,<1.39.0)", "types-boto3-storagegateway (>=1.38.0,<1.39.0)", "types-boto3-sts (>=1.38.0,<1.39.0)", "types-boto3-supplychain (>=1.38.0,<1.39.0)", "types-boto3-support (>=1.38.0,<1.39.0)", "types-boto3-support-app (>=1.38.0,<1.39.0)", "types-boto3-swf (>=1.38.0,<1.39.0)", "types-boto3-synthetics (>=1.38.0,<1.39.0)", "types-boto3-taxsettings (>=1.38.0,<1.39.0)", "types-boto3-textract (>=1.38.0,<1.39.0)", "types-boto3-timestream-influxdb (>=1.38.0,<1.39.0)", "types-boto3-timestream-query (>=1.38.0,<1.39.0)", "types-boto3-timestream-write (>=1.38.0,<1.39.0)", "types-boto3-tnb (>=1.38.0,<1.39.0)", "types-boto3-transcribe (>=1.38.0,<1.39.0)", "types-boto3-transfer (>=1.38.0,<1.39.0)", "types-boto3-translate (>=1.38.0,<1.39.0)", "types-boto3-trustedadvisor (>=1.38.0,<1.39.0)", "types-boto3-verifiedpermissions (>=1.38.0,<1.39.0)", "types-boto3-voice-id (>=1.38.0,<1.39.0)", "types-boto3-vpc-lattice (>=1.38.0,<1.39.0)", "types-boto3-waf (>=1.38.0,<1.39.0)", "types-boto3-waf-regional (>=1.38.0,<1.39.0)", "types-boto3-wafv2 (>=1.38.0,<1.39.0)", "types-boto3-wellarchitected (>=1.38.0,<1.39.0)", "types-boto3-wisdom (>=1.38.0,<1.39.0)", "types-boto3-workdocs (>=1.38.0,<1.39.0)", "types-boto3-workmail (>=1.38.0,<1.39.0)", "types-boto3-workmailmessageflow (>=1.38.0,<1.39.0)", "types-boto3-workspaces (>=1.38.0,<1.39.0)", "types-boto3-workspaces-thin-client (>=1.38.0,<1.39.0)", "types-boto3-workspaces-web (>=1.38.0,<1.39.0)", "types-boto3-xray (>=1.38.0,<1.39.0)"] +amp = ["types-boto3-amp (>=1.38.0,<1.39.0)"] +amplify = ["types-boto3-amplify (>=1.38.0,<1.39.0)"] +amplifybackend = ["types-boto3-amplifybackend (>=1.38.0,<1.39.0)"] +amplifyuibuilder = ["types-boto3-amplifyuibuilder (>=1.38.0,<1.39.0)"] +apigateway = ["types-boto3-apigateway (>=1.38.0,<1.39.0)"] +apigatewaymanagementapi = ["types-boto3-apigatewaymanagementapi (>=1.38.0,<1.39.0)"] +apigatewayv2 = ["types-boto3-apigatewayv2 (>=1.38.0,<1.39.0)"] +appconfig = ["types-boto3-appconfig (>=1.38.0,<1.39.0)"] +appconfigdata = ["types-boto3-appconfigdata (>=1.38.0,<1.39.0)"] +appfabric = ["types-boto3-appfabric (>=1.38.0,<1.39.0)"] +appflow = ["types-boto3-appflow (>=1.38.0,<1.39.0)"] +appintegrations = ["types-boto3-appintegrations (>=1.38.0,<1.39.0)"] +application-autoscaling = ["types-boto3-application-autoscaling (>=1.38.0,<1.39.0)"] +application-insights = ["types-boto3-application-insights (>=1.38.0,<1.39.0)"] +application-signals = ["types-boto3-application-signals (>=1.38.0,<1.39.0)"] +applicationcostprofiler = ["types-boto3-applicationcostprofiler (>=1.38.0,<1.39.0)"] +appmesh = ["types-boto3-appmesh (>=1.38.0,<1.39.0)"] +apprunner = ["types-boto3-apprunner (>=1.38.0,<1.39.0)"] +appstream = ["types-boto3-appstream (>=1.38.0,<1.39.0)"] +appsync = ["types-boto3-appsync (>=1.38.0,<1.39.0)"] +apptest = ["types-boto3-apptest (>=1.38.0,<1.39.0)"] +arc-zonal-shift = ["types-boto3-arc-zonal-shift (>=1.38.0,<1.39.0)"] +artifact = ["types-boto3-artifact (>=1.38.0,<1.39.0)"] +athena = ["types-boto3-athena (>=1.38.0,<1.39.0)"] +auditmanager = ["types-boto3-auditmanager (>=1.38.0,<1.39.0)"] +autoscaling = ["types-boto3-autoscaling (>=1.38.0,<1.39.0)"] +autoscaling-plans = ["types-boto3-autoscaling-plans (>=1.38.0,<1.39.0)"] +b2bi = ["types-boto3-b2bi (>=1.38.0,<1.39.0)"] +backup = ["types-boto3-backup (>=1.38.0,<1.39.0)"] +backup-gateway = ["types-boto3-backup-gateway (>=1.38.0,<1.39.0)"] +backupsearch = ["types-boto3-backupsearch (>=1.38.0,<1.39.0)"] +batch = ["types-boto3-batch (>=1.38.0,<1.39.0)"] +bcm-data-exports = ["types-boto3-bcm-data-exports (>=1.38.0,<1.39.0)"] +bcm-pricing-calculator = ["types-boto3-bcm-pricing-calculator (>=1.38.0,<1.39.0)"] +bedrock = ["types-boto3-bedrock (>=1.38.0,<1.39.0)"] +bedrock-agent = ["types-boto3-bedrock-agent (>=1.38.0,<1.39.0)"] +bedrock-agent-runtime = ["types-boto3-bedrock-agent-runtime (>=1.38.0,<1.39.0)"] +bedrock-data-automation = ["types-boto3-bedrock-data-automation (>=1.38.0,<1.39.0)"] +bedrock-data-automation-runtime = ["types-boto3-bedrock-data-automation-runtime (>=1.38.0,<1.39.0)"] +bedrock-runtime = ["types-boto3-bedrock-runtime (>=1.38.0,<1.39.0)"] +billing = ["types-boto3-billing (>=1.38.0,<1.39.0)"] +billingconductor = ["types-boto3-billingconductor (>=1.38.0,<1.39.0)"] +boto3 = ["boto3 (==1.38.40)"] +braket = ["types-boto3-braket (>=1.38.0,<1.39.0)"] +budgets = ["types-boto3-budgets (>=1.38.0,<1.39.0)"] +ce = ["types-boto3-ce (>=1.38.0,<1.39.0)"] +chatbot = ["types-boto3-chatbot (>=1.38.0,<1.39.0)"] +chime = ["types-boto3-chime (>=1.38.0,<1.39.0)"] +chime-sdk-identity = ["types-boto3-chime-sdk-identity (>=1.38.0,<1.39.0)"] +chime-sdk-media-pipelines = ["types-boto3-chime-sdk-media-pipelines (>=1.38.0,<1.39.0)"] +chime-sdk-meetings = ["types-boto3-chime-sdk-meetings (>=1.38.0,<1.39.0)"] +chime-sdk-messaging = ["types-boto3-chime-sdk-messaging (>=1.38.0,<1.39.0)"] +chime-sdk-voice = ["types-boto3-chime-sdk-voice (>=1.38.0,<1.39.0)"] +cleanrooms = ["types-boto3-cleanrooms (>=1.38.0,<1.39.0)"] +cleanroomsml = ["types-boto3-cleanroomsml (>=1.38.0,<1.39.0)"] +cloud9 = ["types-boto3-cloud9 (>=1.38.0,<1.39.0)"] +cloudcontrol = ["types-boto3-cloudcontrol (>=1.38.0,<1.39.0)"] +clouddirectory = ["types-boto3-clouddirectory (>=1.38.0,<1.39.0)"] +cloudformation = ["types-boto3-cloudformation (>=1.38.0,<1.39.0)"] +cloudfront = ["types-boto3-cloudfront (>=1.38.0,<1.39.0)"] +cloudfront-keyvaluestore = ["types-boto3-cloudfront-keyvaluestore (>=1.38.0,<1.39.0)"] +cloudhsm = ["types-boto3-cloudhsm (>=1.38.0,<1.39.0)"] +cloudhsmv2 = ["types-boto3-cloudhsmv2 (>=1.38.0,<1.39.0)"] +cloudsearch = ["types-boto3-cloudsearch (>=1.38.0,<1.39.0)"] +cloudsearchdomain = ["types-boto3-cloudsearchdomain (>=1.38.0,<1.39.0)"] +cloudtrail = ["types-boto3-cloudtrail (>=1.38.0,<1.39.0)"] +cloudtrail-data = ["types-boto3-cloudtrail-data (>=1.38.0,<1.39.0)"] +cloudwatch = ["types-boto3-cloudwatch (>=1.38.0,<1.39.0)"] +codeartifact = ["types-boto3-codeartifact (>=1.38.0,<1.39.0)"] +codebuild = ["types-boto3-codebuild (>=1.38.0,<1.39.0)"] +codecatalyst = ["types-boto3-codecatalyst (>=1.38.0,<1.39.0)"] +codecommit = ["types-boto3-codecommit (>=1.38.0,<1.39.0)"] +codeconnections = ["types-boto3-codeconnections (>=1.38.0,<1.39.0)"] +codedeploy = ["types-boto3-codedeploy (>=1.38.0,<1.39.0)"] +codeguru-reviewer = ["types-boto3-codeguru-reviewer (>=1.38.0,<1.39.0)"] +codeguru-security = ["types-boto3-codeguru-security (>=1.38.0,<1.39.0)"] +codeguruprofiler = ["types-boto3-codeguruprofiler (>=1.38.0,<1.39.0)"] +codepipeline = ["types-boto3-codepipeline (>=1.38.0,<1.39.0)"] +codestar-connections = ["types-boto3-codestar-connections (>=1.38.0,<1.39.0)"] +codestar-notifications = ["types-boto3-codestar-notifications (>=1.38.0,<1.39.0)"] +cognito-identity = ["types-boto3-cognito-identity (>=1.38.0,<1.39.0)"] +cognito-idp = ["types-boto3-cognito-idp (>=1.38.0,<1.39.0)"] +cognito-sync = ["types-boto3-cognito-sync (>=1.38.0,<1.39.0)"] +comprehend = ["types-boto3-comprehend (>=1.38.0,<1.39.0)"] +comprehendmedical = ["types-boto3-comprehendmedical (>=1.38.0,<1.39.0)"] +compute-optimizer = ["types-boto3-compute-optimizer (>=1.38.0,<1.39.0)"] +config = ["types-boto3-config (>=1.38.0,<1.39.0)"] +connect = ["types-boto3-connect (>=1.38.0,<1.39.0)"] +connect-contact-lens = ["types-boto3-connect-contact-lens (>=1.38.0,<1.39.0)"] +connectcampaigns = ["types-boto3-connectcampaigns (>=1.38.0,<1.39.0)"] +connectcampaignsv2 = ["types-boto3-connectcampaignsv2 (>=1.38.0,<1.39.0)"] +connectcases = ["types-boto3-connectcases (>=1.38.0,<1.39.0)"] +connectparticipant = ["types-boto3-connectparticipant (>=1.38.0,<1.39.0)"] +controlcatalog = ["types-boto3-controlcatalog (>=1.38.0,<1.39.0)"] +controltower = ["types-boto3-controltower (>=1.38.0,<1.39.0)"] +cost-optimization-hub = ["types-boto3-cost-optimization-hub (>=1.38.0,<1.39.0)"] +cur = ["types-boto3-cur (>=1.38.0,<1.39.0)"] +customer-profiles = ["types-boto3-customer-profiles (>=1.38.0,<1.39.0)"] +databrew = ["types-boto3-databrew (>=1.38.0,<1.39.0)"] +dataexchange = ["types-boto3-dataexchange (>=1.38.0,<1.39.0)"] +datapipeline = ["types-boto3-datapipeline (>=1.38.0,<1.39.0)"] +datasync = ["types-boto3-datasync (>=1.38.0,<1.39.0)"] +datazone = ["types-boto3-datazone (>=1.38.0,<1.39.0)"] +dax = ["types-boto3-dax (>=1.38.0,<1.39.0)"] +deadline = ["types-boto3-deadline (>=1.38.0,<1.39.0)"] +detective = ["types-boto3-detective (>=1.38.0,<1.39.0)"] +devicefarm = ["types-boto3-devicefarm (>=1.38.0,<1.39.0)"] +devops-guru = ["types-boto3-devops-guru (>=1.38.0,<1.39.0)"] +directconnect = ["types-boto3-directconnect (>=1.38.0,<1.39.0)"] +discovery = ["types-boto3-discovery (>=1.38.0,<1.39.0)"] +dlm = ["types-boto3-dlm (>=1.38.0,<1.39.0)"] +dms = ["types-boto3-dms (>=1.38.0,<1.39.0)"] +docdb = ["types-boto3-docdb (>=1.38.0,<1.39.0)"] +docdb-elastic = ["types-boto3-docdb-elastic (>=1.38.0,<1.39.0)"] +drs = ["types-boto3-drs (>=1.38.0,<1.39.0)"] +ds = ["types-boto3-ds (>=1.38.0,<1.39.0)"] +ds-data = ["types-boto3-ds-data (>=1.38.0,<1.39.0)"] +dsql = ["types-boto3-dsql (>=1.38.0,<1.39.0)"] +dynamodb = ["types-boto3-dynamodb (>=1.38.0,<1.39.0)"] +dynamodbstreams = ["types-boto3-dynamodbstreams (>=1.38.0,<1.39.0)"] +ebs = ["types-boto3-ebs (>=1.38.0,<1.39.0)"] +ec2 = ["types-boto3-ec2 (>=1.38.0,<1.39.0)"] +ec2-instance-connect = ["types-boto3-ec2-instance-connect (>=1.38.0,<1.39.0)"] +ecr = ["types-boto3-ecr (>=1.38.0,<1.39.0)"] +ecr-public = ["types-boto3-ecr-public (>=1.38.0,<1.39.0)"] +ecs = ["types-boto3-ecs (>=1.38.0,<1.39.0)"] +efs = ["types-boto3-efs (>=1.38.0,<1.39.0)"] +eks = ["types-boto3-eks (>=1.38.0,<1.39.0)"] +eks-auth = ["types-boto3-eks-auth (>=1.38.0,<1.39.0)"] +elasticache = ["types-boto3-elasticache (>=1.38.0,<1.39.0)"] +elasticbeanstalk = ["types-boto3-elasticbeanstalk (>=1.38.0,<1.39.0)"] +elastictranscoder = ["types-boto3-elastictranscoder (>=1.38.0,<1.39.0)"] +elb = ["types-boto3-elb (>=1.38.0,<1.39.0)"] +elbv2 = ["types-boto3-elbv2 (>=1.38.0,<1.39.0)"] +emr = ["types-boto3-emr (>=1.38.0,<1.39.0)"] +emr-containers = ["types-boto3-emr-containers (>=1.38.0,<1.39.0)"] +emr-serverless = ["types-boto3-emr-serverless (>=1.38.0,<1.39.0)"] +entityresolution = ["types-boto3-entityresolution (>=1.38.0,<1.39.0)"] +es = ["types-boto3-es (>=1.38.0,<1.39.0)"] +essential = ["types-boto3-cloudformation (>=1.38.0,<1.39.0)", "types-boto3-dynamodb (>=1.38.0,<1.39.0)", "types-boto3-ec2 (>=1.38.0,<1.39.0)", "types-boto3-lambda (>=1.38.0,<1.39.0)", "types-boto3-rds (>=1.38.0,<1.39.0)", "types-boto3-s3 (>=1.38.0,<1.39.0)", "types-boto3-sqs (>=1.38.0,<1.39.0)"] +events = ["types-boto3-events (>=1.38.0,<1.39.0)"] +evidently = ["types-boto3-evidently (>=1.38.0,<1.39.0)"] +evs = ["types-boto3-evs (>=1.38.0,<1.39.0)"] +finspace = ["types-boto3-finspace (>=1.38.0,<1.39.0)"] +finspace-data = ["types-boto3-finspace-data (>=1.38.0,<1.39.0)"] +firehose = ["types-boto3-firehose (>=1.38.0,<1.39.0)"] +fis = ["types-boto3-fis (>=1.38.0,<1.39.0)"] +fms = ["types-boto3-fms (>=1.38.0,<1.39.0)"] +forecast = ["types-boto3-forecast (>=1.38.0,<1.39.0)"] +forecastquery = ["types-boto3-forecastquery (>=1.38.0,<1.39.0)"] +frauddetector = ["types-boto3-frauddetector (>=1.38.0,<1.39.0)"] +freetier = ["types-boto3-freetier (>=1.38.0,<1.39.0)"] +fsx = ["types-boto3-fsx (>=1.38.0,<1.39.0)"] +full = ["types-boto3-full (>=1.38.0,<1.39.0)"] +gamelift = ["types-boto3-gamelift (>=1.38.0,<1.39.0)"] +gameliftstreams = ["types-boto3-gameliftstreams (>=1.38.0,<1.39.0)"] +geo-maps = ["types-boto3-geo-maps (>=1.38.0,<1.39.0)"] +geo-places = ["types-boto3-geo-places (>=1.38.0,<1.39.0)"] +geo-routes = ["types-boto3-geo-routes (>=1.38.0,<1.39.0)"] +glacier = ["types-boto3-glacier (>=1.38.0,<1.39.0)"] +globalaccelerator = ["types-boto3-globalaccelerator (>=1.38.0,<1.39.0)"] +glue = ["types-boto3-glue (>=1.38.0,<1.39.0)"] +grafana = ["types-boto3-grafana (>=1.38.0,<1.39.0)"] +greengrass = ["types-boto3-greengrass (>=1.38.0,<1.39.0)"] +greengrassv2 = ["types-boto3-greengrassv2 (>=1.38.0,<1.39.0)"] +groundstation = ["types-boto3-groundstation (>=1.38.0,<1.39.0)"] +guardduty = ["types-boto3-guardduty (>=1.38.0,<1.39.0)"] +health = ["types-boto3-health (>=1.38.0,<1.39.0)"] +healthlake = ["types-boto3-healthlake (>=1.38.0,<1.39.0)"] +iam = ["types-boto3-iam (>=1.38.0,<1.39.0)"] +identitystore = ["types-boto3-identitystore (>=1.38.0,<1.39.0)"] +imagebuilder = ["types-boto3-imagebuilder (>=1.38.0,<1.39.0)"] +importexport = ["types-boto3-importexport (>=1.38.0,<1.39.0)"] +inspector = ["types-boto3-inspector (>=1.38.0,<1.39.0)"] +inspector-scan = ["types-boto3-inspector-scan (>=1.38.0,<1.39.0)"] +inspector2 = ["types-boto3-inspector2 (>=1.38.0,<1.39.0)"] +internetmonitor = ["types-boto3-internetmonitor (>=1.38.0,<1.39.0)"] +invoicing = ["types-boto3-invoicing (>=1.38.0,<1.39.0)"] +iot = ["types-boto3-iot (>=1.38.0,<1.39.0)"] +iot-data = ["types-boto3-iot-data (>=1.38.0,<1.39.0)"] +iot-jobs-data = ["types-boto3-iot-jobs-data (>=1.38.0,<1.39.0)"] +iot-managed-integrations = ["types-boto3-iot-managed-integrations (>=1.38.0,<1.39.0)"] +iotanalytics = ["types-boto3-iotanalytics (>=1.38.0,<1.39.0)"] +iotdeviceadvisor = ["types-boto3-iotdeviceadvisor (>=1.38.0,<1.39.0)"] +iotevents = ["types-boto3-iotevents (>=1.38.0,<1.39.0)"] +iotevents-data = ["types-boto3-iotevents-data (>=1.38.0,<1.39.0)"] +iotfleethub = ["types-boto3-iotfleethub (>=1.38.0,<1.39.0)"] +iotfleetwise = ["types-boto3-iotfleetwise (>=1.38.0,<1.39.0)"] +iotsecuretunneling = ["types-boto3-iotsecuretunneling (>=1.38.0,<1.39.0)"] +iotsitewise = ["types-boto3-iotsitewise (>=1.38.0,<1.39.0)"] +iotthingsgraph = ["types-boto3-iotthingsgraph (>=1.38.0,<1.39.0)"] +iottwinmaker = ["types-boto3-iottwinmaker (>=1.38.0,<1.39.0)"] +iotwireless = ["types-boto3-iotwireless (>=1.38.0,<1.39.0)"] +ivs = ["types-boto3-ivs (>=1.38.0,<1.39.0)"] +ivs-realtime = ["types-boto3-ivs-realtime (>=1.38.0,<1.39.0)"] +ivschat = ["types-boto3-ivschat (>=1.38.0,<1.39.0)"] +kafka = ["types-boto3-kafka (>=1.38.0,<1.39.0)"] +kafkaconnect = ["types-boto3-kafkaconnect (>=1.38.0,<1.39.0)"] +kendra = ["types-boto3-kendra (>=1.38.0,<1.39.0)"] +kendra-ranking = ["types-boto3-kendra-ranking (>=1.38.0,<1.39.0)"] +keyspaces = ["types-boto3-keyspaces (>=1.38.0,<1.39.0)"] +kinesis = ["types-boto3-kinesis (>=1.38.0,<1.39.0)"] +kinesis-video-archived-media = ["types-boto3-kinesis-video-archived-media (>=1.38.0,<1.39.0)"] +kinesis-video-media = ["types-boto3-kinesis-video-media (>=1.38.0,<1.39.0)"] +kinesis-video-signaling = ["types-boto3-kinesis-video-signaling (>=1.38.0,<1.39.0)"] +kinesis-video-webrtc-storage = ["types-boto3-kinesis-video-webrtc-storage (>=1.38.0,<1.39.0)"] +kinesisanalytics = ["types-boto3-kinesisanalytics (>=1.38.0,<1.39.0)"] +kinesisanalyticsv2 = ["types-boto3-kinesisanalyticsv2 (>=1.38.0,<1.39.0)"] +kinesisvideo = ["types-boto3-kinesisvideo (>=1.38.0,<1.39.0)"] +kms = ["types-boto3-kms (>=1.38.0,<1.39.0)"] +lakeformation = ["types-boto3-lakeformation (>=1.38.0,<1.39.0)"] +lambda = ["types-boto3-lambda (>=1.38.0,<1.39.0)"] +launch-wizard = ["types-boto3-launch-wizard (>=1.38.0,<1.39.0)"] +lex-models = ["types-boto3-lex-models (>=1.38.0,<1.39.0)"] +lex-runtime = ["types-boto3-lex-runtime (>=1.38.0,<1.39.0)"] +lexv2-models = ["types-boto3-lexv2-models (>=1.38.0,<1.39.0)"] +lexv2-runtime = ["types-boto3-lexv2-runtime (>=1.38.0,<1.39.0)"] +license-manager = ["types-boto3-license-manager (>=1.38.0,<1.39.0)"] +license-manager-linux-subscriptions = ["types-boto3-license-manager-linux-subscriptions (>=1.38.0,<1.39.0)"] +license-manager-user-subscriptions = ["types-boto3-license-manager-user-subscriptions (>=1.38.0,<1.39.0)"] +lightsail = ["types-boto3-lightsail (>=1.38.0,<1.39.0)"] +location = ["types-boto3-location (>=1.38.0,<1.39.0)"] +logs = ["types-boto3-logs (>=1.38.0,<1.39.0)"] +lookoutequipment = ["types-boto3-lookoutequipment (>=1.38.0,<1.39.0)"] +lookoutmetrics = ["types-boto3-lookoutmetrics (>=1.38.0,<1.39.0)"] +lookoutvision = ["types-boto3-lookoutvision (>=1.38.0,<1.39.0)"] +m2 = ["types-boto3-m2 (>=1.38.0,<1.39.0)"] +machinelearning = ["types-boto3-machinelearning (>=1.38.0,<1.39.0)"] +macie2 = ["types-boto3-macie2 (>=1.38.0,<1.39.0)"] +mailmanager = ["types-boto3-mailmanager (>=1.38.0,<1.39.0)"] +managedblockchain = ["types-boto3-managedblockchain (>=1.38.0,<1.39.0)"] +managedblockchain-query = ["types-boto3-managedblockchain-query (>=1.38.0,<1.39.0)"] +marketplace-agreement = ["types-boto3-marketplace-agreement (>=1.38.0,<1.39.0)"] +marketplace-catalog = ["types-boto3-marketplace-catalog (>=1.38.0,<1.39.0)"] +marketplace-deployment = ["types-boto3-marketplace-deployment (>=1.38.0,<1.39.0)"] +marketplace-entitlement = ["types-boto3-marketplace-entitlement (>=1.38.0,<1.39.0)"] +marketplace-reporting = ["types-boto3-marketplace-reporting (>=1.38.0,<1.39.0)"] +marketplacecommerceanalytics = ["types-boto3-marketplacecommerceanalytics (>=1.38.0,<1.39.0)"] +mediaconnect = ["types-boto3-mediaconnect (>=1.38.0,<1.39.0)"] +mediaconvert = ["types-boto3-mediaconvert (>=1.38.0,<1.39.0)"] +medialive = ["types-boto3-medialive (>=1.38.0,<1.39.0)"] +mediapackage = ["types-boto3-mediapackage (>=1.38.0,<1.39.0)"] +mediapackage-vod = ["types-boto3-mediapackage-vod (>=1.38.0,<1.39.0)"] +mediapackagev2 = ["types-boto3-mediapackagev2 (>=1.38.0,<1.39.0)"] +mediastore = ["types-boto3-mediastore (>=1.38.0,<1.39.0)"] +mediastore-data = ["types-boto3-mediastore-data (>=1.38.0,<1.39.0)"] +mediatailor = ["types-boto3-mediatailor (>=1.38.0,<1.39.0)"] +medical-imaging = ["types-boto3-medical-imaging (>=1.38.0,<1.39.0)"] +memorydb = ["types-boto3-memorydb (>=1.38.0,<1.39.0)"] +meteringmarketplace = ["types-boto3-meteringmarketplace (>=1.38.0,<1.39.0)"] +mgh = ["types-boto3-mgh (>=1.38.0,<1.39.0)"] +mgn = ["types-boto3-mgn (>=1.38.0,<1.39.0)"] +migration-hub-refactor-spaces = ["types-boto3-migration-hub-refactor-spaces (>=1.38.0,<1.39.0)"] +migrationhub-config = ["types-boto3-migrationhub-config (>=1.38.0,<1.39.0)"] +migrationhuborchestrator = ["types-boto3-migrationhuborchestrator (>=1.38.0,<1.39.0)"] +migrationhubstrategy = ["types-boto3-migrationhubstrategy (>=1.38.0,<1.39.0)"] +mpa = ["types-boto3-mpa (>=1.38.0,<1.39.0)"] +mq = ["types-boto3-mq (>=1.38.0,<1.39.0)"] +mturk = ["types-boto3-mturk (>=1.38.0,<1.39.0)"] +mwaa = ["types-boto3-mwaa (>=1.38.0,<1.39.0)"] +neptune = ["types-boto3-neptune (>=1.38.0,<1.39.0)"] +neptune-graph = ["types-boto3-neptune-graph (>=1.38.0,<1.39.0)"] +neptunedata = ["types-boto3-neptunedata (>=1.38.0,<1.39.0)"] +network-firewall = ["types-boto3-network-firewall (>=1.38.0,<1.39.0)"] +networkflowmonitor = ["types-boto3-networkflowmonitor (>=1.38.0,<1.39.0)"] +networkmanager = ["types-boto3-networkmanager (>=1.38.0,<1.39.0)"] +networkmonitor = ["types-boto3-networkmonitor (>=1.38.0,<1.39.0)"] +notifications = ["types-boto3-notifications (>=1.38.0,<1.39.0)"] +notificationscontacts = ["types-boto3-notificationscontacts (>=1.38.0,<1.39.0)"] +oam = ["types-boto3-oam (>=1.38.0,<1.39.0)"] +observabilityadmin = ["types-boto3-observabilityadmin (>=1.38.0,<1.39.0)"] +omics = ["types-boto3-omics (>=1.38.0,<1.39.0)"] +opensearch = ["types-boto3-opensearch (>=1.38.0,<1.39.0)"] +opensearchserverless = ["types-boto3-opensearchserverless (>=1.38.0,<1.39.0)"] +opsworks = ["types-boto3-opsworks (>=1.38.0,<1.39.0)"] +opsworkscm = ["types-boto3-opsworkscm (>=1.38.0,<1.39.0)"] +organizations = ["types-boto3-organizations (>=1.38.0,<1.39.0)"] +osis = ["types-boto3-osis (>=1.38.0,<1.39.0)"] +outposts = ["types-boto3-outposts (>=1.38.0,<1.39.0)"] +panorama = ["types-boto3-panorama (>=1.38.0,<1.39.0)"] +partnercentral-selling = ["types-boto3-partnercentral-selling (>=1.38.0,<1.39.0)"] +payment-cryptography = ["types-boto3-payment-cryptography (>=1.38.0,<1.39.0)"] +payment-cryptography-data = ["types-boto3-payment-cryptography-data (>=1.38.0,<1.39.0)"] +pca-connector-ad = ["types-boto3-pca-connector-ad (>=1.38.0,<1.39.0)"] +pca-connector-scep = ["types-boto3-pca-connector-scep (>=1.38.0,<1.39.0)"] +pcs = ["types-boto3-pcs (>=1.38.0,<1.39.0)"] +personalize = ["types-boto3-personalize (>=1.38.0,<1.39.0)"] +personalize-events = ["types-boto3-personalize-events (>=1.38.0,<1.39.0)"] +personalize-runtime = ["types-boto3-personalize-runtime (>=1.38.0,<1.39.0)"] +pi = ["types-boto3-pi (>=1.38.0,<1.39.0)"] +pinpoint = ["types-boto3-pinpoint (>=1.38.0,<1.39.0)"] +pinpoint-email = ["types-boto3-pinpoint-email (>=1.38.0,<1.39.0)"] +pinpoint-sms-voice = ["types-boto3-pinpoint-sms-voice (>=1.38.0,<1.39.0)"] +pinpoint-sms-voice-v2 = ["types-boto3-pinpoint-sms-voice-v2 (>=1.38.0,<1.39.0)"] +pipes = ["types-boto3-pipes (>=1.38.0,<1.39.0)"] +polly = ["types-boto3-polly (>=1.38.0,<1.39.0)"] +pricing = ["types-boto3-pricing (>=1.38.0,<1.39.0)"] +proton = ["types-boto3-proton (>=1.38.0,<1.39.0)"] +qapps = ["types-boto3-qapps (>=1.38.0,<1.39.0)"] +qbusiness = ["types-boto3-qbusiness (>=1.38.0,<1.39.0)"] +qconnect = ["types-boto3-qconnect (>=1.38.0,<1.39.0)"] +qldb = ["types-boto3-qldb (>=1.38.0,<1.39.0)"] +qldb-session = ["types-boto3-qldb-session (>=1.38.0,<1.39.0)"] +quicksight = ["types-boto3-quicksight (>=1.38.0,<1.39.0)"] +ram = ["types-boto3-ram (>=1.38.0,<1.39.0)"] +rbin = ["types-boto3-rbin (>=1.38.0,<1.39.0)"] +rds = ["types-boto3-rds (>=1.38.0,<1.39.0)"] +rds-data = ["types-boto3-rds-data (>=1.38.0,<1.39.0)"] +redshift = ["types-boto3-redshift (>=1.38.0,<1.39.0)"] +redshift-data = ["types-boto3-redshift-data (>=1.38.0,<1.39.0)"] +redshift-serverless = ["types-boto3-redshift-serverless (>=1.38.0,<1.39.0)"] +rekognition = ["types-boto3-rekognition (>=1.38.0,<1.39.0)"] +repostspace = ["types-boto3-repostspace (>=1.38.0,<1.39.0)"] +resiliencehub = ["types-boto3-resiliencehub (>=1.38.0,<1.39.0)"] +resource-explorer-2 = ["types-boto3-resource-explorer-2 (>=1.38.0,<1.39.0)"] +resource-groups = ["types-boto3-resource-groups (>=1.38.0,<1.39.0)"] +resourcegroupstaggingapi = ["types-boto3-resourcegroupstaggingapi (>=1.38.0,<1.39.0)"] +robomaker = ["types-boto3-robomaker (>=1.38.0,<1.39.0)"] +rolesanywhere = ["types-boto3-rolesanywhere (>=1.38.0,<1.39.0)"] +route53 = ["types-boto3-route53 (>=1.38.0,<1.39.0)"] +route53-recovery-cluster = ["types-boto3-route53-recovery-cluster (>=1.38.0,<1.39.0)"] +route53-recovery-control-config = ["types-boto3-route53-recovery-control-config (>=1.38.0,<1.39.0)"] +route53-recovery-readiness = ["types-boto3-route53-recovery-readiness (>=1.38.0,<1.39.0)"] +route53domains = ["types-boto3-route53domains (>=1.38.0,<1.39.0)"] +route53profiles = ["types-boto3-route53profiles (>=1.38.0,<1.39.0)"] +route53resolver = ["types-boto3-route53resolver (>=1.38.0,<1.39.0)"] +rum = ["types-boto3-rum (>=1.38.0,<1.39.0)"] +s3 = ["types-boto3-s3 (>=1.38.0,<1.39.0)"] +s3control = ["types-boto3-s3control (>=1.38.0,<1.39.0)"] +s3outposts = ["types-boto3-s3outposts (>=1.38.0,<1.39.0)"] +s3tables = ["types-boto3-s3tables (>=1.38.0,<1.39.0)"] +sagemaker = ["types-boto3-sagemaker (>=1.38.0,<1.39.0)"] +sagemaker-a2i-runtime = ["types-boto3-sagemaker-a2i-runtime (>=1.38.0,<1.39.0)"] +sagemaker-edge = ["types-boto3-sagemaker-edge (>=1.38.0,<1.39.0)"] +sagemaker-featurestore-runtime = ["types-boto3-sagemaker-featurestore-runtime (>=1.38.0,<1.39.0)"] +sagemaker-geospatial = ["types-boto3-sagemaker-geospatial (>=1.38.0,<1.39.0)"] +sagemaker-metrics = ["types-boto3-sagemaker-metrics (>=1.38.0,<1.39.0)"] +sagemaker-runtime = ["types-boto3-sagemaker-runtime (>=1.38.0,<1.39.0)"] +savingsplans = ["types-boto3-savingsplans (>=1.38.0,<1.39.0)"] +scheduler = ["types-boto3-scheduler (>=1.38.0,<1.39.0)"] +schemas = ["types-boto3-schemas (>=1.38.0,<1.39.0)"] +sdb = ["types-boto3-sdb (>=1.38.0,<1.39.0)"] +secretsmanager = ["types-boto3-secretsmanager (>=1.38.0,<1.39.0)"] +security-ir = ["types-boto3-security-ir (>=1.38.0,<1.39.0)"] +securityhub = ["types-boto3-securityhub (>=1.38.0,<1.39.0)"] +securitylake = ["types-boto3-securitylake (>=1.38.0,<1.39.0)"] +serverlessrepo = ["types-boto3-serverlessrepo (>=1.38.0,<1.39.0)"] +service-quotas = ["types-boto3-service-quotas (>=1.38.0,<1.39.0)"] +servicecatalog = ["types-boto3-servicecatalog (>=1.38.0,<1.39.0)"] +servicecatalog-appregistry = ["types-boto3-servicecatalog-appregistry (>=1.38.0,<1.39.0)"] +servicediscovery = ["types-boto3-servicediscovery (>=1.38.0,<1.39.0)"] +ses = ["types-boto3-ses (>=1.38.0,<1.39.0)"] +sesv2 = ["types-boto3-sesv2 (>=1.38.0,<1.39.0)"] +shield = ["types-boto3-shield (>=1.38.0,<1.39.0)"] +signer = ["types-boto3-signer (>=1.38.0,<1.39.0)"] +simspaceweaver = ["types-boto3-simspaceweaver (>=1.38.0,<1.39.0)"] +sms = ["types-boto3-sms (>=1.38.0,<1.39.0)"] +snow-device-management = ["types-boto3-snow-device-management (>=1.38.0,<1.39.0)"] +snowball = ["types-boto3-snowball (>=1.38.0,<1.39.0)"] +sns = ["types-boto3-sns (>=1.38.0,<1.39.0)"] +socialmessaging = ["types-boto3-socialmessaging (>=1.38.0,<1.39.0)"] +sqs = ["types-boto3-sqs (>=1.38.0,<1.39.0)"] +ssm = ["types-boto3-ssm (>=1.38.0,<1.39.0)"] +ssm-contacts = ["types-boto3-ssm-contacts (>=1.38.0,<1.39.0)"] +ssm-guiconnect = ["types-boto3-ssm-guiconnect (>=1.38.0,<1.39.0)"] +ssm-incidents = ["types-boto3-ssm-incidents (>=1.38.0,<1.39.0)"] +ssm-quicksetup = ["types-boto3-ssm-quicksetup (>=1.38.0,<1.39.0)"] +ssm-sap = ["types-boto3-ssm-sap (>=1.38.0,<1.39.0)"] +sso = ["types-boto3-sso (>=1.38.0,<1.39.0)"] +sso-admin = ["types-boto3-sso-admin (>=1.38.0,<1.39.0)"] +sso-oidc = ["types-boto3-sso-oidc (>=1.38.0,<1.39.0)"] +stepfunctions = ["types-boto3-stepfunctions (>=1.38.0,<1.39.0)"] +storagegateway = ["types-boto3-storagegateway (>=1.38.0,<1.39.0)"] +sts = ["types-boto3-sts (>=1.38.0,<1.39.0)"] +supplychain = ["types-boto3-supplychain (>=1.38.0,<1.39.0)"] +support = ["types-boto3-support (>=1.38.0,<1.39.0)"] +support-app = ["types-boto3-support-app (>=1.38.0,<1.39.0)"] +swf = ["types-boto3-swf (>=1.38.0,<1.39.0)"] +synthetics = ["types-boto3-synthetics (>=1.38.0,<1.39.0)"] +taxsettings = ["types-boto3-taxsettings (>=1.38.0,<1.39.0)"] +textract = ["types-boto3-textract (>=1.38.0,<1.39.0)"] +timestream-influxdb = ["types-boto3-timestream-influxdb (>=1.38.0,<1.39.0)"] +timestream-query = ["types-boto3-timestream-query (>=1.38.0,<1.39.0)"] +timestream-write = ["types-boto3-timestream-write (>=1.38.0,<1.39.0)"] +tnb = ["types-boto3-tnb (>=1.38.0,<1.39.0)"] +transcribe = ["types-boto3-transcribe (>=1.38.0,<1.39.0)"] +transfer = ["types-boto3-transfer (>=1.38.0,<1.39.0)"] +translate = ["types-boto3-translate (>=1.38.0,<1.39.0)"] +trustedadvisor = ["types-boto3-trustedadvisor (>=1.38.0,<1.39.0)"] +verifiedpermissions = ["types-boto3-verifiedpermissions (>=1.38.0,<1.39.0)"] +voice-id = ["types-boto3-voice-id (>=1.38.0,<1.39.0)"] +vpc-lattice = ["types-boto3-vpc-lattice (>=1.38.0,<1.39.0)"] +waf = ["types-boto3-waf (>=1.38.0,<1.39.0)"] +waf-regional = ["types-boto3-waf-regional (>=1.38.0,<1.39.0)"] +wafv2 = ["types-boto3-wafv2 (>=1.38.0,<1.39.0)"] +wellarchitected = ["types-boto3-wellarchitected (>=1.38.0,<1.39.0)"] +wisdom = ["types-boto3-wisdom (>=1.38.0,<1.39.0)"] +workdocs = ["types-boto3-workdocs (>=1.38.0,<1.39.0)"] +workmail = ["types-boto3-workmail (>=1.38.0,<1.39.0)"] +workmailmessageflow = ["types-boto3-workmailmessageflow (>=1.38.0,<1.39.0)"] +workspaces = ["types-boto3-workspaces (>=1.38.0,<1.39.0)"] +workspaces-thin-client = ["types-boto3-workspaces-thin-client (>=1.38.0,<1.39.0)"] +workspaces-web = ["types-boto3-workspaces-web (>=1.38.0,<1.39.0)"] +xray = ["types-boto3-xray (>=1.38.0,<1.39.0)"] + +[[package]] +name = "types-s3transfer" +version = "0.13.0" +description = "Type annotations and code completion for s3transfer" +optional = false +python-versions = ">=3.8" +groups = ["test"] +files = [ + {file = "types_s3transfer-0.13.0-py3-none-any.whl", hash = "sha256:79c8375cbf48a64bff7654c02df1ec4b20d74f8c5672fc13e382f593ca5565b3"}, + {file = "types_s3transfer-0.13.0.tar.gz", hash = "sha256:203dadcb9865c2f68fb44bc0440e1dc05b79197ba4a641c0976c26c9af75ef52"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -1806,4 +2292,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.8.1" -content-hash = "47a0ecc167b2221602cf2ba98f0e8a96b5b472e0d92561715551df388a6e8844" +content-hash = "79fe037f83dad57d53f4d2867c4b2f24bf840ac10e84ce32f9899f1b193170e6" diff --git a/pyproject.toml b/pyproject.toml index 42726060..cf43fb93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ mysql-connector-python = "^8.4.0" [tool.poetry.group.test.dependencies] boto3 = "^1.34.111" +types-boto3 = "^1.34.111" coverage = "^7.5.1" debugpy = "^1.8.1" pydevd-pycharm = "^233.13763.5" @@ -55,6 +56,7 @@ pytest-html = "^4.1.1" pytest-html-merger = ">=0.0.10,<0.1.1" toxiproxy-python = "^0.1.1" parameterized = "^0.9.0" +tabulate = "^0.9.0" psycopg = "^3.1.19" psycopg-binary = "^3.1.19" mysql-connector-python = "^8.4.0" diff --git a/tests/integration/container/test_blue_green_deployment.py b/tests/integration/container/test_blue_green_deployment.py new file mode 100644 index 00000000..f7463a1d --- /dev/null +++ b/tests/integration/container/test_blue_green_deployment.py @@ -0,0 +1,1249 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Deque, Dict, List, Optional, Tuple + +if TYPE_CHECKING: + from .utils.connection_utils import ConnectionUtils + from .utils.test_driver import TestDriver + +import math +import socket +from collections import deque +from dataclasses import dataclass +from threading import Event, Thread +from time import perf_counter_ns, sleep + +import boto3 +import pytest +from tabulate import tabulate # type: ignore + +from aws_advanced_python_wrapper import AwsWrapperConnection +from aws_advanced_python_wrapper.blue_green_plugin import (BlueGreenPlugin, + BlueGreenRole) +from aws_advanced_python_wrapper.database_dialect import DialectCode +from aws_advanced_python_wrapper.driver_info import DriverInfo +from aws_advanced_python_wrapper.utils.atomic import AtomicInt +from aws_advanced_python_wrapper.utils.concurrent import (ConcurrentDict, + CountDownLatch) +from aws_advanced_python_wrapper.utils.log import Logger +from aws_advanced_python_wrapper.utils.properties import WrapperProperties +from aws_advanced_python_wrapper.utils.rdsutils import RdsUtils +from .utils.conditions import enable_on_deployments, enable_on_features +from .utils.database_engine import DatabaseEngine +from .utils.database_engine_deployment import DatabaseEngineDeployment +from .utils.driver_helper import DriverHelper +from .utils.rds_test_utility import RdsTestUtility +from .utils.test_environment import TestEnvironment +from .utils.test_environment_features import TestEnvironmentFeatures + + +@enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.MULTI_AZ_INSTANCE]) +@enable_on_features([TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT]) +class TestBlueGreenDeployment: + logger = Logger(__name__) + + INCLUDE_CLUSTER_ENDPOINTS = False + INCLUDE_WRITER_AND_READER_ONLY = False + TEST_CLUSTER_ID = "test-cluster-id" + MYSQL_BG_STATUS_QUERY = \ + ("SELECT id, SUBSTRING_INDEX(endpoint, '.', 1) as hostId, endpoint, port, role, status, version " + "FROM mysql.rds_topology") + PG_AURORA_BG_STATUS_QUERY = \ + ("SELECT id, SPLIT_PART(endpoint, '.', 1) as hostId, endpoint, port, role, status, version " + "FROM get_blue_green_fast_switchover_metadata('aws_jdbc_driver')") + PG_RDS_BG_STATUS_QUERY = f"SELECT * FROM rds_tools.show_topology('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')" + results: ConcurrentDict[str, BlueGreenResults] = ConcurrentDict() + unhandled_exceptions: Deque[Exception] = deque() + + @pytest.fixture(scope='class') + def test_utility(self): + region: str = TestEnvironment.get_current().get_info().get_region() + return RdsTestUtility(region) + + @pytest.fixture(scope='class') + def rds_utils(self): + return RdsUtils() + + def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment, test_driver): + self.results.clear() + self.unhandled_exceptions.clear() + + iam_enabled = TestEnvironmentFeatures.IAM in test_environment.get_features() + start_time_ns = perf_counter_ns() + stop = Event() + start_latch = CountDownLatch() + finish_latch = CountDownLatch() + thread_count = 0 + thread_finish_count = 0 + threads: List[Thread] = [] + + env = TestEnvironment.get_current() + info = env.get_info() + db_name = info.get_db_name() + test_instance = env.get_writer() + topology_instances: List[str] = self.get_bg_endpoints( + test_environment, test_utility, rds_utils, info.get_bg_deployment_id()) + topology_instances_str = '\n'.join(topology_instances) + self.logger.debug(f"topology_instances: \n{topology_instances_str}") + + for host in topology_instances: + host_id = host[0:host.index(".")] + assert host_id + + self.results.put(host_id, BlueGreenResults()) + + if rds_utils.is_not_green_or_old_instance(host): + threads.append(Thread( + target=self.direct_topology_monitor, + args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, + finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + + threads.append(Thread( + target=self.direct_blue_connectivity_monitor, + args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, + finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + + threads.append(Thread( + target=self.direct_blue_idle_connectivity_monitor, + args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, + finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + + threads.append(Thread( + target=self.wrapper_blue_idle_connectivity_monitor, + args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, + finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + + threads.append(Thread( + target=self.wrapper_blue_executing_connectivity_monitor, + args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, + finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + + threads.append(Thread( + target=self.wrapper_blue_new_connection_monitor, + args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, + finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + # TODO: should we increment thread_finish_count too? + + threads.append(Thread( + target=self.blue_dns_monitor, + args=(host_id, host, start_latch, stop, finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + + if rds_utils.is_green_instance(host): + threads.append(Thread( + target=self.direct_topology_monitor, + args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, + finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + + threads.append(Thread( + target=self.wrapper_green_connectivity_monitor, + args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, + finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + + threads.append(Thread( + target=self.green_dns_monitor, + args=(host_id, host, start_latch, stop, finish_latch, self.results.get(host_id)))) + thread_count += 1 + thread_finish_count += 1 + + if iam_enabled: + rds_client = boto3.client("rds", region_name=test_environment.get_region()) + + threads.append(Thread( + target=self.green_iam_connectivity_monitor, + args=(test_driver, conn_utils, rds_client, host_id, "BlueHostToken", + self.rds_utils().remove_green_instance_prefix(host), host, test_instance.get_port(), + db_name, start_latch, stop, finish_latch, self.results.get(host_id), + self.results.get(host_id).green_direct_iam_ip_with_blue_node_connect_times, False, True))) + thread_count += 1 + thread_finish_count += 1 + + threads.append(Thread( + target=self.green_iam_connectivity_monitor, + args=(test_driver, conn_utils, rds_client, host_id, "GreenHostToken", host, host, + test_instance.get_port(), db_name, start_latch, stop, finish_latch, + self.results.get(host_id), + self.results.get(host_id).green_direct_iam_ip_with_green_node_connect_times, True, False) + )) + thread_count += 1 + thread_finish_count += 1 + + threads.append(Thread( + target=self.bg_switchover_trigger, + args=(test_utility, info.get_bg_deployment_id(), start_latch, finish_latch, self.results))) + thread_count += 1 + thread_finish_count += 1 + + for result in self.results.values(): + result.start_time_ns.set(start_time_ns) + + for thread in threads: + thread.start() + + self.logger.debug("All threads started.") + + finish_latch.wait_sec(6 * 60) + self.logger.debug("All threads completed.") + + sleep(3 * 60) + + self.logger.debug("Stopping all threads...") + stop.set() + + for thread in threads: + thread.join(timeout=10) + if thread.is_alive(): + self.logger.debug("Timed out waiting for a thread to stop running...") + + self.logger.debug("Done waiting for threads to stop.") + + for host_id, result in self.results.items(): + assert result.bg_trigger_time_ns.get() > 0, \ + f"bg_trigger_time for {host_id} was {result.bg_trigger_time_ns.get()}" + + self.logger.debug("Test is over.") + self.print_metrics(rds_utils) + + if len(self.unhandled_exceptions) > 0: + self.log_unhandled_exceptions() + pytest.fail("There were unhandled exceptions.") + + self.assert_test() + + self.logger.debug("Completed") + + def get_bg_endpoints( + self, + test_env: TestEnvironment, + test_utility: RdsTestUtility, + rds_utils: RdsUtils, + bg_id: str) -> List[str]: + bg_deployment = test_utility.get_blue_green_deployment(bg_id) + if bg_deployment is None: + pytest.fail(f"Blue/Green deployment with ID '{bg_id}' not found.") + + if test_env.get_deployment() == DatabaseEngineDeployment.MULTI_AZ_INSTANCE: + blue_instance = test_utility.get_rds_instance_info_by_arn(bg_deployment["Source"]) + if blue_instance is None: + pytest.fail("Blue instance not found.") + + green_instance = test_utility.get_rds_instance_info_by_arn(bg_deployment["Target"]) + if green_instance is None: + pytest.fail("Green instance not found.") + + return [blue_instance["Endpoint"]["Address"], green_instance["Endpoint"]["Address"]] + + elif test_env.get_deployment() == DatabaseEngineDeployment.AURORA: + endpoints = [] + blue_cluster = test_utility.get_cluster_by_arn(bg_deployment["Source"]) + if blue_cluster is None: + pytest.fail("Blue cluster not found.") + + if self.INCLUDE_CLUSTER_ENDPOINTS: + endpoints.append(test_env.get_database_info().get_cluster_endpoint()) + + instances = test_env.get_instances() + if self.INCLUDE_WRITER_AND_READER_ONLY: + endpoints.append(instances[0].get_host()) + if len(instances) > 1: + endpoints.append(instances[1].get_host()) + else: + endpoints.extend([instance_info.get_host() for instance_info in instances]) + + green_cluster = test_utility.get_cluster_by_arn(bg_deployment["Target"]) + if green_cluster is None: + pytest.fail("Green cluster not found.") + + if self.INCLUDE_CLUSTER_ENDPOINTS: + endpoints.append(green_cluster["Endpoint"]) + + instance_ids = test_utility.get_instance_ids(green_cluster["Endpoint"]) + if len(instance_ids) < 1: + pytest.fail("Cannot find green cluster instances.") + + instance_pattern = rds_utils.get_rds_instance_host_pattern(green_cluster["Endpoint"]) + if self.INCLUDE_WRITER_AND_READER_ONLY: + endpoints.append(instance_pattern.replace("?", instance_ids[0])) + if len(instance_ids) > 1: + endpoints.append(instance_pattern.replace("?", instance_ids[1])) + else: + endpoints.extend([instance_pattern.replace("?", instance_id) for instance_id in instance_ids]) + + return endpoints + else: + pytest.fail(f"Unsupported blue/green engine deployment: {test_env.get_deployment()}") + + # Monitor BG status changes + # Can terminate for itself + def direct_topology_monitor( + self, + test_driver: TestDriver, + conn_utils: ConnectionUtils, + host_id: str, + host: str, + port: int, + db: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults): + conn = None + test_env = TestEnvironment.get_current() + engine = test_env.get_engine() + + query = None + if engine == DatabaseEngine.MYSQL: + query = self.MYSQL_BG_STATUS_QUERY + elif engine == DatabaseEngine.PG: + db_deployment = test_env.get_deployment() + if db_deployment == DatabaseEngineDeployment.AURORA: + query = self.PG_AURORA_BG_STATUS_QUERY + elif db_deployment == DatabaseEngineDeployment.MULTI_AZ_INSTANCE: + query = self.PG_RDS_BG_STATUS_QUERY + else: + pytest.fail(f"Unsupported blue/green database engine deployment: {db_deployment}") + else: + pytest.fail(f"Unsupported database engine: {engine}") + + try: + conn = self.get_direct_connection( + test_driver, + **conn_utils.get_connect_params(host=host, port=port, dbname=db), + **self.get_telemetry_params()) + self.logger.debug(f"[DirectTopology] @ {host_id}] Connection opened.") + + sleep(1) + + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + self.logger.debug(f"[DirectTopology @ {host_id}] Starting BG status monitoring.") + + end_time_ns = perf_counter_ns() + 15 * 60 * 1_000_000_000 # 15 minutes + while not stop.is_set() and perf_counter_ns() < end_time_ns: + if conn is None: + conn = self.get_direct_connection( + test_driver, **conn_utils.get_connect_params(host=host, port=port, dbname=db)) + self.logger.debug(f"[DirectTopology] @ {host_id}] Connection re-opened.") + + try: + cursor = conn.cursor() + cursor.execute(query) + for record in cursor: + role = record["role"] + version = record["version"] + status = record["status"] + is_green = BlueGreenRole.parse_role(role, version) == BlueGreenRole.TARGET + + def _log_and_return_time(_) -> int: + self.logger.debug(f"[DirectTopology] @ {host_id}] Status changed to: {status}.") + return perf_counter_ns() + + if is_green: + results.green_status_time.compute_if_absent(status, _log_and_return_time) + else: + results.blue_status_time.compute_if_absent(status, _log_and_return_time) + + sleep(0.1) + except Exception as e: + self.logger.debug(f"[DirectTopology] @ {host_id}] Thread exception: {e}.") + self.close_connection(conn) + conn = None + except Exception as e: + self.logger.debug(f"[DirectTopology] @ {host_id}] Thread unhandled exception: {e}.") + self.unhandled_exceptions.append(e) + finally: + self.close_connection(conn) + finish_latch.count_down() + self.logger.debug(f"[DirectTopology] @ {host_id}] Thread is completed.") + + def get_telemetry_params(self) -> Dict[str, Any]: + params: Dict[str, Any] = {} + features = TestEnvironment.get_current().get_features() + if TestEnvironmentFeatures.TELEMETRY_TRACES_ENABLED in features \ + or TestEnvironmentFeatures.TELEMETRY_METRICS_ENABLED in features: + params[WrapperProperties.ENABLE_TELEMETRY.name] = True + params[WrapperProperties.TELEMETRY_SUBMIT_TOPLEVEL.name] = True + if TestEnvironmentFeatures.TELEMETRY_TRACES_ENABLED in features: + params[WrapperProperties.TELEMETRY_TRACES_BACKEND.name] = "XRAY" + if TestEnvironmentFeatures.TELEMETRY_METRICS_ENABLED in features: + params[WrapperProperties.TELEMETRY_METRICS_BACKEND.name] = "OTLP" + + return params + + def get_direct_connection(self, test_driver: TestDriver, **connect_params) -> AwsWrapperConnection: + conn = None + connect_count = 0 + target_driver_connect = DriverHelper.get_connect_func(test_driver) + while conn is None and connect_count < 10: + try: + conn = target_driver_connect(**connect_params) + except Exception: + # ignore, try to connect again + pass + + connect_count += 1 + + if conn is None: + pytest.fail(f"Cannot connect to {connect_params.get('host')}") + + return conn + + def close_connection(self, conn: Optional[AwsWrapperConnection]): + try: + if conn is not None and not conn.is_closed: + conn.close() + except Exception: + # do nothing + pass + + # Blue node + # Checking: connectivity, SELECT 1 + # Can terminate for itself + def direct_blue_connectivity_monitor( + self, + test_driver: TestDriver, + conn_utils: ConnectionUtils, + host_id: str, + host: str, + port: int, + db: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults): + conn = None + try: + conn = self.get_direct_connection( + test_driver, + **conn_utils.get_connect_params(host=host, port=port, dbname=db), + **self.get_telemetry_params()) + self.logger.debug(f"[DirectBlueConnectivity @ {host_id}] Connection opened.") + + sleep(1) + + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + self.logger.debug(f"[DirectBlueConnectivity @ {host_id}] Starting connectivity monitoring.") + + while not stop.is_set(): + try: + cursor = conn.cursor() + cursor.execute("SELECT 1") + sleep(1) + except Exception as e: + self.logger.debug(f"[DirectBlueConnectivity @ {host_id}] Thread exception: {e}") + results.direct_blue_lost_connection_time_ns.set(perf_counter_ns()) + break + except Exception as e: + self.logger.debug(f"[DirectBlueConnectivity @ {host_id}] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + self.close_connection(conn) + finish_latch.count_down() + self.logger.debug(f"[DirectBlueConnectivity @ {host_id}] Thread is completed.") + + # Blue node + # Checking: connectivity, is_closed + # Can terminate for itself + def direct_blue_idle_connectivity_monitor( + self, + test_driver: TestDriver, + conn_utils: ConnectionUtils, + host_id: str, + host: str, + port: int, + db: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults): + conn = None + try: + conn = self.get_direct_connection( + test_driver, + **conn_utils.get_connect_params(host=host, port=port, dbname=db), + **self.get_telemetry_params()) + self.logger.debug(f"[DirectBlueIdleConnectivity @ {host_id}] Connection opened.") + + sleep(1) + + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + self.logger.debug(f"[DirectBlueIdleConnectivity @ {host_id}] Starting connectivity monitoring.") + + while not stop.is_set(): + try: + if conn.is_closed: + results.direct_blue_idle_lost_connection_time_ns.set(perf_counter_ns()) + break + + sleep(1) + except Exception as e: + self.logger.debug(f"[DirectBlueIdleConnectivity @ {host_id}] Thread exception: {e}") + results.direct_blue_idle_lost_connection_time_ns.set(perf_counter_ns()) + break + except Exception as e: + self.logger.debug(f"[DirectBlueIdleConnectivity @ {host_id}] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + self.close_connection(conn) + finish_latch.count_down() + self.logger.debug(f"[DirectBlueIdleConnectivity @ {host_id}] Thread is completed.") + + # Blue node + # Check: connectivity, is_closed + # Can terminate for itself + def wrapper_blue_idle_connectivity_monitor( + self, + test_driver: TestDriver, + conn_utils: ConnectionUtils, + host_id: str, + host: str, + port: int, + db: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults): + conn = None + try: + connect_params = self.get_wrapper_connect_params(conn_utils, host, port, db) + conn = self.get_wrapper_connection(test_driver, **connect_params) + self.logger.debug(f"[WrapperBlueIdleConnectivity @ {host_id}] Connection opened.") + + sleep(1) + + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + self.logger.debug(f"[WrapperBlueIdleConnectivity @ {host_id}] Starting connectivity monitoring.") + + while not stop.is_set(): + try: + if conn.is_closed: + results.wrapper_blue_idle_lost_connection_time_ns.set(perf_counter_ns()) + break + + sleep(1) + except Exception as e: + self.logger.debug(f"[WrapperBlueIdleConnectivity @ {host_id}] Thread exception: {e}") + results.direct_blue_idle_lost_connection_time_ns.set(perf_counter_ns()) + break + except Exception as e: + self.logger.debug(f"[WrapperBlueIdleConnectivity @ {host_id}] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + self.close_connection(conn) + finish_latch.count_down() + self.logger.debug(f"[WrapperBlueIdleConnectivity @ {host_id}] Thread is completed.") + + def get_wrapper_connect_params(self, conn_utils: ConnectionUtils, host: str, port: int, db: str) -> Dict[str, Any]: + params = conn_utils.get_connect_params(host=host, port=port, dbname=db) + params = {**params, **self.get_telemetry_params()} + params[WrapperProperties.CLUSTER_ID.name] = self.TEST_CLUSTER_ID + test_env = TestEnvironment.get_current() + engine = test_env.get_engine() + db_deployment = test_env.get_deployment() + + if db_deployment == DatabaseEngineDeployment.AURORA: + if engine == DatabaseEngine.MYSQL: + params[WrapperProperties.DIALECT.name] = DialectCode.AURORA_MYSQL + elif engine == DatabaseEngine.PG: + params[WrapperProperties.DIALECT.name] = DialectCode.AURORA_PG + elif db_deployment == DatabaseEngineDeployment.MULTI_AZ_INSTANCE: + if engine == DatabaseEngine.MYSQL: + params[WrapperProperties.DIALECT.name] = DialectCode.RDS_MYSQL + elif engine == DatabaseEngine.PG: + params[WrapperProperties.DIALECT.name] = DialectCode.RDS_PG + + if TestEnvironmentFeatures.IAM in test_env.get_features(): + params[WrapperProperties.PLUGINS.name] = "bg,iam" + params[WrapperProperties.USER.name] = test_env.get_info().get_iam_user_name() + params[WrapperProperties.IAM_REGION.name] = test_env.get_info().get_region() + else: + params[WrapperProperties.PLUGINS.name] = "bg" + + return params + + def get_wrapper_connection(self, test_driver: TestDriver, **connect_params) -> AwsWrapperConnection: + conn = None + connect_count = 0 + target_driver_connect = DriverHelper.get_connect_func(test_driver) + while conn is None and connect_count < 10: + try: + conn = AwsWrapperConnection.connect(target_driver_connect, **connect_params) + except Exception: + # ignore, try to connect again + pass + + connect_count += 1 + + if conn is None: + pytest.fail(f"Cannot connect to {connect_params.get('host')}") + + return conn + + # Blue node + # Check: connectivity, SELECT sleep(5) + # Expect: long execution time (longer than 5s) during active phase of switchover + # Can terminate for itself + def wrapper_blue_executing_connectivity_monitor( + self, + test_driver: TestDriver, + conn_utils: ConnectionUtils, + host_id: str, + host: str, + port: int, + db: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults): + conn = None + query = None + test_env = TestEnvironment.get_current() + engine = test_env.get_engine() + if engine == DatabaseEngine.MYSQL: + query = "SELECT sleep(5)" + elif engine == DatabaseEngine.PG: + query = "SELECT pg_sleep(5)" + else: + pytest.fail(f"Unsupported database engine: {engine}") + + try: + connect_params = self.get_wrapper_connect_params(conn_utils, host, port, db) + conn = self.get_wrapper_connection(test_driver, **connect_params) + bg_plugin: Optional[BlueGreenPlugin] = conn._unwrap(BlueGreenPlugin) + assert bg_plugin is not None, f"Unable to find blue/green plugin in wrapper connection for {host}." + self.logger.debug(f"[WrapperBlueExecute @ {host_id}] Connection opened.") + + sleep(1) + + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + self.logger.debug(f"[WrapperBlueExecute @ {host_id}] Starting connectivity monitoring.") + + while not stop.is_set(): + start_time_ns = perf_counter_ns() + try: + cursor = conn.cursor() + cursor.execute(query) + end_time_ns = perf_counter_ns() + results.blue_wrapper_execute_times.append( + TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns())) + except Exception as e: + results.blue_wrapper_execute_times.append( + TimeHolder(start_time_ns, perf_counter_ns(), bg_plugin.get_hold_time_ns(), str(e))) + if conn.is_closed: + break + + sleep(1) + except Exception as e: + self.logger.debug(f"[WrapperBlueExecute @ {host_id}] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + self.close_connection(conn) + finish_latch.count_down() + self.logger.debug(f"[WrapperBlueExecute @ {host_id}] Thread is completed.") + + # Blue node + # Check: connectivity, opening a new connection + # Expect: longer opening connection time during active phase of switchover + # Need a stop signal to terminate + def wrapper_blue_new_connection_monitor( + self, + test_driver: TestDriver, + conn_utils: ConnectionUtils, + host_id: str, + host: str, + port: int, + db: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults): + conn = None + try: + connect_params = self.get_wrapper_connect_params(conn_utils, host, port, db) + + sleep(1) + + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + self.logger.debug(f"[WrapperBlueNewConnection @ {host_id}] Starting connectivity monitoring.") + + while not stop.is_set(): + start_time_ns = perf_counter_ns() + + try: + conn = self.get_wrapper_connection(test_driver, **connect_params) + end_time_ns = perf_counter_ns() + bg_plugin: Optional[BlueGreenPlugin] = conn._unwrap(BlueGreenPlugin) + assert bg_plugin is not None, f"Unable to find blue/green plugin in wrapper connection for {host}." + + results.blue_wrapper_connect_times.append( + TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns())) + except Exception as e: + self.logger.debug(f"[WrapperBlueNewConnection @ {host_id}] Thread exception: {e}") + end_time_ns = perf_counter_ns() + if conn is not None: + bg_plugin = conn._unwrap(BlueGreenPlugin) + assert bg_plugin is not None, f"Unable to find blue/green plugin in wrapper connection for {host}." + results.blue_wrapper_connect_times.append( + TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns(), str(e))) + else: + results.blue_wrapper_connect_times.append( + TimeHolder(start_time_ns, end_time_ns, error=str(e))) + + self.close_connection(conn) + conn = None + sleep(1) + + except Exception as e: + self.logger.debug(f"[WrapperBlueNewConnection @ {host_id}] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + self.close_connection(conn) + finish_latch.count_down() + self.logger.debug(f"[WrapperBlueNewConnection @ {host_id}] Thread is completed.") + + # Blue DNS + # Check time of IP address change + # Can terminate for itself + def blue_dns_monitor( + self, + host_id: str, + host: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults): + try: + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + + original_ip = socket.gethostbyname(host) + self.logger.debug(f"[BlueDNS @ {host_id}] {host} -> {original_ip}") + + while not stop.is_set(): + sleep(1) + + try: + current_ip = socket.gethostbyname(host) + if current_ip != original_ip: + results.dns_blue_changed_time_ns.set(perf_counter_ns()) + self.logger.debug(f"[BlueDNS @ {host_id}] {host} -> {current_ip}") + break + except socket.gaierror as e: + self.logger.debug(f"[BlueDNS @ {host_id}] Error: {e}") + results.dns_blue_error = str(e) + results.dns_blue_changed_time_ns.set(perf_counter_ns()) + break + + except Exception as e: + self.logger.debug(f"[BlueDNS @ {host_id}] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + finish_latch.count_down() + self.logger.debug(f"[BlueDNS @ {host_id}] Thread is completed.") + + # Green node + # Check: connectivity, SELECT 1 + # Expect: no interruption, execute takes longer time during BG switchover + # Can terminate for itself + def wrapper_green_connectivity_monitor( + self, + test_driver: TestDriver, + conn_utils: ConnectionUtils, + host_id: str, + host: str, + port: int, + db: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults): + conn = None + try: + connect_params = self.get_wrapper_connect_params(conn_utils, host, port, db) + conn = self.get_wrapper_connection(test_driver, **connect_params) + self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Connection opened.") + + bg_plugin: Optional[BlueGreenPlugin] = conn._unwrap(BlueGreenPlugin) + assert bg_plugin is not None, f"Unable to find blue/green plugin in wrapper connection for {host}." + + sleep(1) + + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Starting connectivity monitoring.") + + start_time_ns = perf_counter_ns() + while not stop.is_set(): + try: + cursor = conn.cursor() + start_time_ns = perf_counter_ns() + cursor.execute("SELECT 1") + end_time_ns = perf_counter_ns() + results.green_wrapper_execute_times.append( + TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns())) + sleep(1) + except Exception as e: + # TODO: do we need to handle the query timeout scenario like JDBC does for sqlTimeoutException? + self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Thread exception: {e}") + results.wrapper_green_lost_connection_time_ns.set(perf_counter_ns()) + break + except Exception as e: + self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + self.close_connection(conn) + finish_latch.count_down() + self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Thread is completed.") + + # Green node + # Check: DNS record presence + # Expect: DNS record is deleted during/after switchover + # Can terminate by itself + def green_dns_monitor( + self, + host_id: str, + host: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults): + try: + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + + ip = socket.gethostbyname(host) + self.logger.debug(f"[GreenDNS @ {host_id}] {host} -> {ip}") + + while not stop.is_set(): + sleep(1) + + try: + socket.gethostbyname(host) + except socket.gaierror: + results.dns_green_removed_time_ns.set(perf_counter_ns()) + break + + except Exception as e: + self.logger.debug(f"[GreenDNS @ {host_id}] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + finish_latch.count_down() + self.logger.debug(f"[GreenDNS @ {host_id}] Thread is completed.") + + # Green node + # Check: connectivity (opening a new connection) with IAM when using node IP address + # Expect: lose connectivity after green node changes its name (green prefix to no prefix) + # Can terminate for itself + def green_iam_connectivity_monitor( + self, + test_driver, + conn_utils: ConnectionUtils, + rds_client, + host_id: str, + thread_prefix: str, + iam_token_host: str, + connect_host: str, + port: int, + db: str, + start_latch: CountDownLatch, + stop: Event, + finish_latch: CountDownLatch, + results: BlueGreenResults, + result_queue: Deque[TimeHolder], + notify_on_first_error: bool, + exit_on_first_success: bool): + conn = None + try: + test_env = TestEnvironment.get_current() + iam_user = test_env.get_info().get_iam_user_name() + green_ip = socket.gethostbyname(connect_host) + connect_params = conn_utils.get_connect_params(host=green_ip, port=port, user=iam_user, dbname=db) + connect_params[WrapperProperties.CONNECT_TIMEOUT_SEC.name] = 10 + connect_params[WrapperProperties.SOCKET_TIMEOUT_SEC.name] = 10 + + sleep(1) + + # Notify that this thread is ready for work + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + self.logger.debug( + f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Starting connectivity monitoring {iam_token_host}") + + while not stop.is_set(): + token = rds_client.generate_db_auth_token(DBHostname=iam_token_host, port=port, DBUsername=iam_user) + connect_params[WrapperProperties.PASSWORD.name] = token + + start_ns = perf_counter_ns() + try: + target_driver_conn = DriverHelper.get_connect_func(test_driver) + conn = target_driver_conn(**connect_params) + end_ns = perf_counter_ns() + result_queue.append(TimeHolder(start_ns, end_ns)) + + if exit_on_first_success: + results.green_node_changed_name_time_ns.compare_and_set(0, perf_counter_ns()) + self.logger.debug( + f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Successfully connected. Exiting thread...") + return + # TODO: do we need to handle the query timeout scenario like JDBC does for sqlTimeoutException? + except Exception as e: + self.logger.debug(f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Thread exception: {e}") + end_ns = perf_counter_ns() + result_queue.append(TimeHolder(start_ns, end_ns, error=str(e))) + # TODO: is 'Access Denied' the error message in Python as well as JDBC? + if notify_on_first_error and "access denied" in str(e).lower(): + results.green_node_changed_name_time_ns.compare_and_set(0, perf_counter_ns()) + self.logger.debug( + f"[DirectGreenIamIp{thread_prefix} @ {host_id}] " + f"Encountered first 'Access denied' exception. Exiting thread...") + return + + self.close_connection(conn) + conn = None + sleep(1) + + except Exception as e: + self.logger.debug(f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + self.close_connection(conn) + finish_latch.count_down() + self.logger.debug(f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Thread is completed.") + + # Trigger BG switchover using RDS API + # Can terminate for itself + def bg_switchover_trigger( + self, + test_utility: RdsTestUtility, + bg_id: str, + start_latch: CountDownLatch, + finish_latch: CountDownLatch, + results: Dict[str, BlueGreenResults]): + try: + start_latch.count_down() + + # Wait until other threads are ready to start the test + start_latch.wait_sec(5 * 60) + + sync_time_ns = perf_counter_ns() + for result in results.values(): + result.threads_sync_time.set(sync_time_ns) + + sleep(30) + test_utility.switchover_blue_green_deployment(bg_id) + + bg_trigger_time_ns = perf_counter_ns() + for result in results.values(): + result.bg_trigger_time_ns.set(bg_trigger_time_ns) + except Exception as e: + self.logger.debug(f"[Switchover] Thread unhandled exception: {e}") + self.unhandled_exceptions.append(e) + finally: + finish_latch.count_down() + self.logger.debug("[Switchover] Thread is completed.") + + def print_metrics(self, rds_utils: RdsUtils): + bg_trigger_time_ns = next((result.bg_trigger_time_ns.get() for result in self.results.values()), None) + assert bg_trigger_time_ns is not None, "Cannot get bg_trigger_time" + + table = [] + headers = [ + "Instance/endpoint", + "Start time", + "Threads sync", + "direct Blue conn dropped (idle)", + "direct Blue conn dropped (SELECT 1)", + "wrapper Blue conn dropped (idle)", + "wrapper Green conn dropped (SELECT 1)", + "Blue DNS updated", + "Green DNS removed", + "Green node certificate change" + ] + + def entry_green_comparator(result_entry: Tuple[str, BlueGreenResults]): + return 1 if rds_utils.is_green_instance(result_entry[0] + ".") else 0 + + def entry_name_comparator(result_entry: Tuple[str, BlueGreenResults]): + rds_utils.remove_green_instance_prefix(result_entry[0]).lower() + + sorted_entries: List[Tuple[str, BlueGreenResults]] = sorted( + self.results.items(), + key=lambda result_entry: ( + entry_green_comparator(result_entry), + entry_name_comparator(result_entry) + ) + ) + + if not sorted_entries: + table.append(["No entries"]) + + for entry in sorted_entries: + results = entry[1] + start_time_ms = (results.start_time_ns.get() - bg_trigger_time_ns) // 1_000_000 + threads_sync_time_ms = (results.threads_sync_time.get() - bg_trigger_time_ns) // 1_000_000 + direct_blue_idle_lost_connection_time_ms = ( + self.get_formatted_time_ns_to_ms(results.direct_blue_idle_lost_connection_time_ns, bg_trigger_time_ns)) + direct_blue_lost_connection_time_ms = ( + self.get_formatted_time_ns_to_ms(results.direct_blue_lost_connection_time_ns, bg_trigger_time_ns)) + wrapper_blue_idle_lost_connection_time_ms = ( + self.get_formatted_time_ns_to_ms(results.wrapper_blue_idle_lost_connection_time_ns, bg_trigger_time_ns)) + wrapper_green_lost_connection_time_ms = ( + self.get_formatted_time_ns_to_ms(results.wrapper_green_lost_connection_time_ns, bg_trigger_time_ns)) + dns_blue_changed_time_ms = ( + self.get_formatted_time_ns_to_ms(results.dns_blue_changed_time_ns, bg_trigger_time_ns)) + dns_green_removed_time_ms = ( + self.get_formatted_time_ns_to_ms(results.dns_green_removed_time_ns, bg_trigger_time_ns)) + green_node_changed_name_time_ms = ( + self.get_formatted_time_ns_to_ms(results.green_node_changed_name_time_ns, bg_trigger_time_ns)) + + table.append([ + entry[0], + start_time_ms, + threads_sync_time_ms, + direct_blue_idle_lost_connection_time_ms, + direct_blue_lost_connection_time_ms, + wrapper_blue_idle_lost_connection_time_ms, + wrapper_green_lost_connection_time_ms, + dns_blue_changed_time_ms, + dns_green_removed_time_ms, + green_node_changed_name_time_ms]) + + self.logger.debug(f"\n{tabulate(table, headers=headers)}") + + for entry in sorted_entries: + if not entry[1].blue_status_time and not entry[1].green_status_time: + continue + self.print_node_status_times(entry[0], entry[1], bg_trigger_time_ns) + + for entry in sorted_entries: + if not entry[1].blue_wrapper_connect_times: + continue + self.print_duration_times( + entry[0], "Wrapper connection time (ms) to Blue", + entry[1].blue_wrapper_connect_times, bg_trigger_time_ns) + + for entry in sorted_entries: + if not entry[1].green_direct_iam_ip_with_green_node_connect_times: + continue + self.print_duration_times( + entry[0], "Wrapper IAM (green token) connection time (ms) to Green", + entry[1].green_direct_iam_ip_with_green_node_connect_times, bg_trigger_time_ns) + + for entry in sorted_entries: + if not entry[1].blue_wrapper_execute_times: + continue + self.print_duration_times( + entry[0], "Wrapper execution time (ms) to Blue", + entry[1].blue_wrapper_execute_times, bg_trigger_time_ns) + + for entry in sorted_entries: + if not entry[1].green_wrapper_execute_times: + continue + self.print_duration_times( + entry[0], "Wrapper execution time (ms) to Green", + entry[1].green_wrapper_execute_times, bg_trigger_time_ns) + + def get_formatted_time_ns_to_ms(self, atomic_end_time_ns: AtomicInt, time_zero_ns: int) -> str: + return "-" if atomic_end_time_ns.get() == 0 else f"{(atomic_end_time_ns.get() - time_zero_ns) // 1_000_000} ms" + + def print_node_status_times(self, node: str, results: BlueGreenResults, time_zero_ns: int): + status_map: ConcurrentDict[str, int] = results.blue_status_time + status_map.put_all(results.green_status_time) + table = [] + headers = ["Status", "SOURCE", "TARGET"] + sorted_status_names = [k for k, v in sorted(status_map.items(), key=lambda x: x[1])] + for status in sorted_status_names: + blue_status_time_ns = results.blue_status_time.get(status) + if blue_status_time_ns: + source_time_ms_str = f"{(blue_status_time_ns - time_zero_ns) // 1_000_000} ms" + else: + source_time_ms_str = "" + + green_status_time_ns = results.green_status_time.get(status) + if green_status_time_ns: + target_time_ms_str = f"{(green_status_time_ns - time_zero_ns) // 1_000_000} ms" + else: + target_time_ms_str = "" + + table.append([status, source_time_ms_str, target_time_ms_str]) + + self.logger.debug(f"\n{node}:\n{tabulate(table, headers=headers)}") + + def print_duration_times(self, node: str, title: str, times: Deque[TimeHolder], time_zero_ns: int): + table = [] + headers = ["Connect at (ms)", "Connect time/duration (ms)", "Error"] + p99_ns = self.get_percentile([time.end_time_ns - time.start_time_ns for time in times], 99.0) + p99_ms = p99_ns // 1_000_000 + table.append(["p99", p99_ms, ""]) + first_connect = times[0] + table.append([ + (first_connect.start_time_ns - time_zero_ns) // 1_000_000, + (first_connect.end_time_ns - first_connect.start_time_ns) // 1_000_000, + self.get_formatted_error(first_connect.error) + ]) + + for time_holder in times: + duration_ms = (time_holder.end_time_ns - time_holder.start_time_ns) // 1_000_000 + if duration_ms > p99_ms: + table.append([ + (time_holder.start_time_ns - time_zero_ns) // 1_000_000, + (time_holder.end_time_ns - time_holder.start_time_ns) // 1_000_000, + self.get_formatted_error(time_holder.error) + ]) + + last_connect = times[-1] + table.append([ + (last_connect.start_time_ns - time_zero_ns) // 1_000_000, + (last_connect.end_time_ns - last_connect.start_time_ns) // 1_000_000, + self.get_formatted_error(last_connect.error) + ]) + + self.logger.debug(f"\n{node}: {title}\n{tabulate(table, headers=headers)}") + + def get_formatted_error(self, error: Optional[str]) -> str: + return "" if error is None else error[0:min(len(error), 100)].replace("\n", " ") + "..." + + def get_percentile(self, input_data: List[int], percentile: float): + if not input_data: + return 0 + + sorted_list = sorted(input_data) + rank = 1 if percentile == 0 else math.ceil(percentile / 100.0 * len(input_data)) + return sorted_list[rank - 1] + + def log_unhandled_exceptions(self): + for exception in self.unhandled_exceptions: + self.logger.debug(f"Unhandled exception: {exception}") + + def assert_test(self): + bg_trigger_time_ns = next((result.bg_trigger_time_ns.get() for result in self.results.values()), None) + assert bg_trigger_time_ns is not None, "Cannot get bg_trigger_time" + + max_green_node_change_time_ms = max( + (0 if result.green_node_change_name_time.get() == 0 + else (result.green_node_change_name_time.get() - bg_trigger_time_ns) // 1_000_000 + for result in self.results.values()), + default=0 + ) + self.logger.debug(f"max_green_node_change_time: {max_green_node_change_time_ms} ms") + + switchover_complete_time_ms = max( + (0 if x == 0 + else (x - bg_trigger_time_ns) // 1_000_000 + for result in self.results.values() + if result.green_status_time + for x in [result.green_status_time.get("SWITCHOVER_COMPLETED", 0)]), + default=0 + ) + self.logger.debug(f"switchoverCompleteTime: {switchover_complete_time_ms} ms") + + # Assertions + assert switchover_complete_time_ms != 0, "BG switchover hasn't completed." + assert switchover_complete_time_ms >= max_green_node_change_time_ms, "Green node changed name after SWITCHOVER_COMPLETED." + + +@dataclass +class TimeHolder: + start_time_ns: int + end_time_ns: int + hold_ns: int = 0 + error: Optional[str] = None + + +@dataclass +class BlueGreenResults: + start_time_ns: AtomicInt = AtomicInt() + threads_sync_time: AtomicInt = AtomicInt() + bg_trigger_time_ns: AtomicInt = AtomicInt() + direct_blue_lost_connection_time_ns: AtomicInt = AtomicInt() + direct_blue_idle_lost_connection_time_ns: AtomicInt = AtomicInt() + wrapper_blue_idle_lost_connection_time_ns: AtomicInt = AtomicInt() + wrapper_green_lost_connection_time_ns: AtomicInt = AtomicInt() + dns_blue_changed_time_ns: AtomicInt = AtomicInt() + dns_blue_error: Optional[str] = None + dns_green_removed_time_ns: AtomicInt = AtomicInt() + green_node_changed_name_time_ns: AtomicInt = AtomicInt() + blue_status_time: ConcurrentDict[str, int] = ConcurrentDict() + green_status_time: ConcurrentDict[str, int] = ConcurrentDict() + blue_wrapper_connect_times: Deque[TimeHolder] = deque() + blue_wrapper_execute_times: Deque[TimeHolder] = deque() + green_wrapper_execute_times: Deque[TimeHolder] = deque() + green_direct_iam_ip_with_blue_node_connect_times: Deque[TimeHolder] = deque() + green_direct_iam_ip_with_green_node_connect_times: Deque[TimeHolder] = deque() diff --git a/tests/integration/container/utils/rds_test_utility.py b/tests/integration/container/utils/rds_test_utility.py index b49bd7fe..9986bf9e 100644 --- a/tests/integration/container/utils/rds_test_utility.py +++ b/tests/integration/container/utils/rds_test_utility.py @@ -17,6 +17,8 @@ from contextlib import closing from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast +import botocore.exceptions + if TYPE_CHECKING: from aws_advanced_python_wrapper.pep249 import Connection from .test_database_info import TestDatabaseInfo @@ -28,7 +30,6 @@ import boto3 import pytest -from botocore.config import Config from aws_advanced_python_wrapper.driver_info import DriverInfo from aws_advanced_python_wrapper.errors import UnsupportedOperationError @@ -48,8 +49,10 @@ class RdsTestUtility: _client: Any def __init__(self, region: str, endpoint: Optional[str] = None): - config = Config(region_name=region, endpoint_url=endpoint) if endpoint else Config(region_name=region) - self._client = boto3.client('rds', config=config) + if endpoint: + self._client = boto3.client(service_name='rds', region_name=region, endpoint_url=endpoint) + else: + self._client = boto3.client(service_name='rds', region_name=region) def get_db_instance(self, instance_id: str) -> Optional[Dict[str, Any]]: filters = [{'Name': "db-instance-id", 'Values': [f"{instance_id}"]}] @@ -286,23 +289,23 @@ def get_cluster_writer_instance_id(self, cluster_id: Optional[str] = None) -> st return cast('str', m.get("DBInstanceIdentifier")) raise Exception(Messages.get_formatted("RdsTestUtility.WriterInstanceNotFound", cluster_id)) - def get_instance_ids(self) -> List[str]: + def get_instance_ids(self, host: Optional[str] = None) -> List[str]: test_environment: TestEnvironment = TestEnvironment.get_current() deployment: DatabaseEngineDeployment = test_environment.get_deployment() if DatabaseEngineDeployment.AURORA == deployment: - return self._get_aurora_instance_ids() + return self._get_aurora_instance_ids(host) elif DatabaseEngineDeployment.MULTI_AZ_CLUSTER == deployment: - return self._get_multi_az_instance_ids() + return self._get_multi_az_instance_ids(host) else: raise RuntimeError("RdsTestUtility.MethodNotSupportedForDeployment", "get_instance_ids", deployment) - def _get_aurora_instance_ids(self) -> List[str]: + def _get_aurora_instance_ids(self, host: Optional[str] = None) -> List[str]: test_environment: TestEnvironment = TestEnvironment.get_current() engine: DatabaseEngine = test_environment.get_engine() instance_info: TestInstanceInfo = test_environment.get_writer() sql = self._get_aurora_topology_sql(engine) - with self._open_connection(instance_info) as conn, conn.cursor() as cursor: + with self._open_connection(instance_info, host) as conn, conn.cursor() as cursor: cursor.execute(sql) records = cursor.fetchall() @@ -312,7 +315,7 @@ def _get_aurora_instance_ids(self) -> List[str]: return result - def _get_multi_az_instance_ids(self) -> List[str]: + def _get_multi_az_instance_ids(self, host: Optional[str] = None) -> List[str]: test_environment: TestEnvironment = TestEnvironment.get_current() engine: DatabaseEngine = test_environment.get_engine() cluster_endpoint_instance_info: TestInstanceInfo = TestInstanceInfo({ @@ -322,7 +325,7 @@ def _get_multi_az_instance_ids(self) -> List[str]: self.logger.debug("Testing._get_multi_az_instance_ids_connecting", cluster_endpoint_instance_info.get_host()) - conn = self._open_connection(cluster_endpoint_instance_info) + conn = self._open_connection(cluster_endpoint_instance_info, host) cursor = conn.cursor() get_writer_id_query = self._get_multi_az_writer_sql(engine) cursor.execute(get_writer_id_query) @@ -350,7 +353,7 @@ def _get_multi_az_instance_ids(self) -> List[str]: return result - def _open_connection(self, instance_info: TestInstanceInfo) -> Any: + def _open_connection(self, instance_info: TestInstanceInfo, host: Optional[str] = None) -> Any: env: TestEnvironment = TestEnvironment.get_current() database_engine: DatabaseEngine = env.get_engine() @@ -358,12 +361,12 @@ def _open_connection(self, instance_info: TestInstanceInfo) -> Any: target_driver_connect = DriverHelper.get_connect_func(test_driver) + host = host if host is not None else instance_info.get_host() user = env.get_database_info().get_username() password = env.get_database_info().get_password() db = env.get_database_info().get_default_db_name() - conn_params = DriverHelper.get_connect_params( - instance_info.get_host(), instance_info.get_port(), user, password, db, test_driver) + conn_params = DriverHelper.get_connect_params(host, instance_info.get_port(), user, password, db, test_driver) conn = target_driver_connect(**conn_params, connect_timeout=10) return conn @@ -449,3 +452,47 @@ def get_aurora_engine_name(engine: DatabaseEngine): return "aurora-mysql" raise RuntimeError(Messages.get_formatted("RdsTestUtility.InvalidDatabaseEngine", engine.value)) + + def get_cluster_by_arn(self, cluster_arn: str) -> Optional[Any]: + response = self._client.describe_db_clusters(Filters=[{'Name': 'db-cluster-id', 'Values': [cluster_arn]}]) + clusters = response["DBClusters"] + if len(clusters) < 1: + return None + + return clusters[0] + + def get_rds_instance_info_by_arn(self, instance_arn: str) -> Optional[Any]: + response = self._client.describe_db_instances(Filters=[{'Name': 'db-instance-id', 'Values': [instance_arn]}]) + instances = response["DBInstances"] + if len(instances) < 1: + return None + + return instances[0] + + def get_blue_green_deployment(self, bg_id: str) -> Optional[Any]: + try: + response: Any = self._client.describe_blue_green_deployments(BlueGreenDeploymentIdentifier=bg_id) + deployments = response.get("BlueGreenDeployments") + if len(deployments) < 1: + return None + + return deployments[0] + except self._client.exceptions.BlueGreenDeploymentNotFoundFault: + return None + + def switchover_blue_green_deployment(self, bg_id: str): + try: + self._client.switchover_blue_green_deployment(BlueGreenDeploymentIdentifier=bg_id) + self.logger.debug("switchover_blue_green_deployment request is sent.") + except botocore.exceptions.ClientError as e: + error_info = e.response['Error'] + self.logger.debug( + f"switchover_blue_green_deployment error: code={error_info['Code']}, message={error_info['Message']}") + + if error_info['Message']: + error_message = error_info['Message'] + else: + error_message = (f"The switchover_blue_green_deployment request for the blue/green deployment with " + f"ID '{bg_id}'failed for an unspecified reason") + + raise Exception(error_message) diff --git a/tests/unit/test_dialect.py b/tests/unit/test_dialect.py index 4830f291..34540893 100644 --- a/tests/unit/test_dialect.py +++ b/tests/unit/test_dialect.py @@ -122,13 +122,13 @@ def test_mysql_is_dialect(mock_conn, mock_cursor, mock_session, mysql_dialect, m def test_rds_mysql_is_dialect(mock_super, mock_cursor, mock_conn, rds_mysql_dialect, mock_driver_dialect): mock_super().is_dialect.return_value = True - records = [("some_value", "some_value"), ("some_value", "source distribution")] - mock_cursor.__iter__.return_value = records + records = ("some_value", "source distribution") + mock_cursor.fetchone.return_value = records assert rds_mysql_dialect.is_dialect(mock_conn, mock_driver_dialect) - records = [("some_value", "some_value"), ("some_value", "some_value")] - mock_cursor.__iter__.return_value = records + records = ("some_value", "some_value") + mock_cursor.fetchone.return_value = records assert not rds_mysql_dialect.is_dialect(mock_conn, mock_driver_dialect) diff --git a/tests/unit/test_secrets_manager_plugin.py b/tests/unit/test_secrets_manager_plugin.py index 0530cf05..3e7dc049 100644 --- a/tests/unit/test_secrets_manager_plugin.py +++ b/tests/unit/test_secrets_manager_plugin.py @@ -26,25 +26,16 @@ from __future__ import annotations -from typing import TYPE_CHECKING - -from aws_advanced_python_wrapper.aws_secrets_manager_plugin import \ - AwsSecretsManagerPlugin - -if TYPE_CHECKING: - from boto3 import Session, client - from aws_advanced_python_wrapper.pep249 import Connection - from aws_advanced_python_wrapper.database_dialect import DatabaseDialect - from aws_advanced_python_wrapper.plugin_service import PluginService - from types import SimpleNamespace -from typing import Callable, Dict, Tuple +from typing import Dict, Tuple from unittest import TestCase from unittest.mock import MagicMock, patch from botocore.exceptions import ClientError from parameterized import param, parameterized +from aws_advanced_python_wrapper.aws_secrets_manager_plugin import \ + AwsSecretsManagerPlugin from aws_advanced_python_wrapper.errors import AwsWrapperError from aws_advanced_python_wrapper.hostinfo import HostInfo from aws_advanced_python_wrapper.utils.messages import Messages @@ -79,19 +70,15 @@ class TestAwsSecretsManagerPlugin(TestCase): }, 'ResponseMetadata': { 'HTTPStatusCode': 400, + 'RequestId': 'test-request-id', + 'HostId': 'test-host-id', + 'HTTPHeaders': {}, + 'RetryAttempts': 0 } }, "some_operation") _secrets_cache: Dict[Tuple, SimpleNamespace] = {} - _mock_func: Callable - _mock_plugin_service: PluginService - _mock_dialect: DatabaseDialect - _mock_session: Session - _mock_client: client - _mock_connection: Connection - _pg_properties: Properties - def setUp(self): self._mock_func = MagicMock() self._mock_plugin_service = MagicMock() From f3df9acae6aa7fbbf5b5844400a5ba65f472d103 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 20 Jun 2025 14:04:26 -0700 Subject: [PATCH 19/41] Fix build error, add exception timeout check --- .../container/test_blue_green_deployment.py | 111 ++++++++++++------ 1 file changed, 77 insertions(+), 34 deletions(-) diff --git a/tests/integration/container/test_blue_green_deployment.py b/tests/integration/container/test_blue_green_deployment.py index f7463a1d..1b06b0fa 100644 --- a/tests/integration/container/test_blue_green_deployment.py +++ b/tests/integration/container/test_blue_green_deployment.py @@ -28,6 +28,9 @@ from typing import TYPE_CHECKING, Any, Deque, Dict, List, Optional, Tuple +import mysql.connector +import psycopg + if TYPE_CHECKING: from .utils.connection_utils import ConnectionUtils from .utils.test_driver import TestDriver @@ -35,7 +38,7 @@ import math import socket from collections import deque -from dataclasses import dataclass +from dataclasses import dataclass, field from threading import Event, Thread from time import perf_counter_ns, sleep @@ -744,7 +747,11 @@ def wrapper_blue_new_connection_monitor( results.blue_wrapper_connect_times.append( TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns())) except Exception as e: - self.logger.debug(f"[WrapperBlueNewConnection @ {host_id}] Thread exception: {e}") + if self.is_timeout_exception(e): + self.logger.debug(f"[WrapperBlueNewConnection @ {host_id}] Thread timeout exception: {e}") + else: + self.logger.debug(f"[WrapperBlueNewConnection @ {host_id}] Thread exception: {e}") + end_time_ns = perf_counter_ns() if conn is not None: bg_plugin = conn._unwrap(BlueGreenPlugin) @@ -767,6 +774,33 @@ def wrapper_blue_new_connection_monitor( finish_latch.count_down() self.logger.debug(f"[WrapperBlueNewConnection @ {host_id}] Thread is completed.") + def is_timeout_exception(self, exception: Exception) -> bool: + error_message = str(exception).lower() + timeout_keywords = [ + "timeout", "timed out", "statement timeout", + "query execution was interrupted", "canceling statement due to", + "connection timed out", "lost connection", "terminated" + ] + + # Check for timeout keywords in message + if any(keyword in error_message for keyword in timeout_keywords): + return True + + # MySQL-specific checks + if isinstance(exception, mysql.connector.Error): + # MySQL timeout error codes + timeout_error_codes = [1205, 2013, 2006] # Lock timeout, lost connection, server gone away + if hasattr(exception, 'errno') and exception.errno in timeout_error_codes: + return True + + # PostgreSQL-specific checks + if isinstance(exception, psycopg.Error): + # PostgreSQL timeout usually contains specific text + if "canceling statement due to statement timeout" in error_message: + return True + + return False + # Blue DNS # Check time of IP address change # Can terminate for itself @@ -855,10 +889,17 @@ def wrapper_green_connectivity_monitor( TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns())) sleep(1) except Exception as e: - # TODO: do we need to handle the query timeout scenario like JDBC does for sqlTimeoutException? - self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Thread exception: {e}") - results.wrapper_green_lost_connection_time_ns.set(perf_counter_ns()) - break + if self.is_timeout_exception(e): + self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Thread timeout exception: {e}") + results.green_wrapper_execute_times.append( + TimeHolder(start_time_ns, perf_counter_ns(), bg_plugin.get_hold_time_ns(), str(e))) + if conn.is_closed: + results.wrapper_green_lost_connection_time_ns.set(perf_counter_ns()) + break + else: + self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Thread exception: {e}") + results.wrapper_green_lost_connection_time_ns.set(perf_counter_ns()) + break except Exception as e: self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Thread unhandled exception: {e}") self.unhandled_exceptions.append(e) @@ -962,18 +1003,20 @@ def green_iam_connectivity_monitor( self.logger.debug( f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Successfully connected. Exiting thread...") return - # TODO: do we need to handle the query timeout scenario like JDBC does for sqlTimeoutException? except Exception as e: - self.logger.debug(f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Thread exception: {e}") - end_ns = perf_counter_ns() - result_queue.append(TimeHolder(start_ns, end_ns, error=str(e))) - # TODO: is 'Access Denied' the error message in Python as well as JDBC? - if notify_on_first_error and "access denied" in str(e).lower(): - results.green_node_changed_name_time_ns.compare_and_set(0, perf_counter_ns()) - self.logger.debug( - f"[DirectGreenIamIp{thread_prefix} @ {host_id}] " - f"Encountered first 'Access denied' exception. Exiting thread...") - return + if self.is_timeout_exception(e): + self.logger.debug(f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Thread exception: {e}") + result_queue.append(TimeHolder(start_ns, perf_counter_ns(), error=str(e))) + else: + self.logger.debug(f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Thread exception: {e}") + result_queue.append(TimeHolder(start_ns, perf_counter_ns(), error=str(e))) + # TODO: is 'Access Denied' the error message in Python as well as JDBC? + if notify_on_first_error and "access denied" in str(e).lower(): + results.green_node_changed_name_time_ns.compare_and_set(0, perf_counter_ns()) + self.logger.debug( + f"[DirectGreenIamIp{thread_prefix} @ {host_id}] " + f"Encountered first 'Access denied' exception. Exiting thread...") + return self.close_connection(conn) conn = None @@ -1229,21 +1272,21 @@ class TimeHolder: @dataclass class BlueGreenResults: - start_time_ns: AtomicInt = AtomicInt() - threads_sync_time: AtomicInt = AtomicInt() - bg_trigger_time_ns: AtomicInt = AtomicInt() - direct_blue_lost_connection_time_ns: AtomicInt = AtomicInt() - direct_blue_idle_lost_connection_time_ns: AtomicInt = AtomicInt() - wrapper_blue_idle_lost_connection_time_ns: AtomicInt = AtomicInt() - wrapper_green_lost_connection_time_ns: AtomicInt = AtomicInt() - dns_blue_changed_time_ns: AtomicInt = AtomicInt() + start_time_ns: AtomicInt = field(default_factory=lambda: AtomicInt(0)) + threads_sync_time: AtomicInt = field(default_factory=lambda: AtomicInt(0)) + bg_trigger_time_ns: AtomicInt = field(default_factory=lambda: AtomicInt(0)) + direct_blue_lost_connection_time_ns: AtomicInt = field(default_factory=lambda: AtomicInt(0)) + direct_blue_idle_lost_connection_time_ns: AtomicInt = field(default_factory=lambda: AtomicInt(0)) + wrapper_blue_idle_lost_connection_time_ns: AtomicInt = field(default_factory=lambda: AtomicInt(0)) + wrapper_green_lost_connection_time_ns: AtomicInt = field(default_factory=lambda: AtomicInt(0)) + dns_blue_changed_time_ns: AtomicInt = field(default_factory=lambda: AtomicInt(0)) dns_blue_error: Optional[str] = None - dns_green_removed_time_ns: AtomicInt = AtomicInt() - green_node_changed_name_time_ns: AtomicInt = AtomicInt() - blue_status_time: ConcurrentDict[str, int] = ConcurrentDict() - green_status_time: ConcurrentDict[str, int] = ConcurrentDict() - blue_wrapper_connect_times: Deque[TimeHolder] = deque() - blue_wrapper_execute_times: Deque[TimeHolder] = deque() - green_wrapper_execute_times: Deque[TimeHolder] = deque() - green_direct_iam_ip_with_blue_node_connect_times: Deque[TimeHolder] = deque() - green_direct_iam_ip_with_green_node_connect_times: Deque[TimeHolder] = deque() + dns_green_removed_time_ns: AtomicInt = field(default_factory=lambda: AtomicInt(0)) + green_node_changed_name_time_ns: AtomicInt = field(default_factory=lambda: AtomicInt(0)) + blue_status_time: ConcurrentDict[str, int] = field(default_factory=ConcurrentDict) + green_status_time: ConcurrentDict[str, int] = field(default_factory=ConcurrentDict) + blue_wrapper_connect_times: Deque[TimeHolder] = field(default_factory=deque) + blue_wrapper_execute_times: Deque[TimeHolder] = field(default_factory=deque) + green_wrapper_execute_times: Deque[TimeHolder] = field(default_factory=deque) + green_direct_iam_ip_with_blue_node_connect_times: Deque[TimeHolder] = field(default_factory=deque) + green_direct_iam_ip_with_green_node_connect_times: Deque[TimeHolder] = field(default_factory=deque) From d22cfdbc0a402ead31b45298f6c4da2d1324de60 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Wed, 25 Jun 2025 14:19:11 -0700 Subject: [PATCH 20/41] PG test passing, max_green_node_changed_name_time 0 --- .../blue_green_plugin.py | 84 ++++++----- aws_advanced_python_wrapper/plugin_service.py | 32 +++++ ...dvanced_python_wrapper_messages.properties | 6 +- .../utils/concurrent.py | 3 + aws_advanced_python_wrapper/utils/utils.py | 8 ++ .../container/test_blue_green_deployment.py | 135 ++++++++++-------- .../container/utils/rds_test_utility.py | 13 ++ tests/integration/host/build.gradle.kts | 102 +++++++++++++ .../integration/host/TestEnvironment.java | 14 +- 9 files changed, 301 insertions(+), 96 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index e98fb2d7..5385acff 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -14,6 +14,7 @@ from __future__ import annotations +import logging import socket from datetime import datetime from time import perf_counter_ns @@ -812,10 +813,10 @@ def __init__( self._rds_utils = RdsUtils() self._cv = Condition() - self._should_collect_ip_addresses = Event() - self._should_collect_ip_addresses.set() - self._should_collect_topology = Event() - self._should_collect_topology.set() + self.should_collect_ip_addresses = Event() + self.should_collect_ip_addresses.set() + self.should_collect_topology = Event() + self.should_collect_topology.set() self.use_ip_address = Event() self._panic_mode = Event() self._panic_mode.set() @@ -837,6 +838,7 @@ def __init__( self._connection_host_info: Optional[HostInfo] = None self._connected_ip_address: Optional[str] = None self._is_host_info_correct = Event() + self._has_started = Event() db_dialect = self._plugin_service.database_dialect if not isinstance(db_dialect, BlueGreenDialect): @@ -846,7 +848,11 @@ def __init__( self._open_connection_thread: Optional[Thread] = None self._monitor_thread = Thread(daemon=True, name="BlueGreenMonitorThread", target=self._run) - self._monitor_thread.start() + + def start(self): + if not self._has_started.is_set(): + self._has_started.set() + self._monitor_thread.start() def _run(self): try: @@ -973,16 +979,18 @@ def _collect_status(self): with conn.cursor() as cursor: cursor.execute(self._bg_dialect.blue_green_status_query) for record in cursor: - version = record["version"] + # columns: id, endpoint, port, role, status, version, update_stamp + # TODO: is the order of columns the same for all dialects? + version = record[5] if version not in BlueGreenStatusMonitor._KNOWN_VERSIONS: self._version = BlueGreenStatusMonitor._LATEST_KNOWN_VERSION logger.warning( "BlueGreenStatusMonitor.UsesVersion", self._bg_role, version, self._version) - endpoint = record["endpoint"] - port = record["port"] - bg_role = BlueGreenRole.parse_role(record["role"], self._version) - phase = BlueGreenPhase.parse_phase(record["status"]) + endpoint = record[1] + port = record[2] + bg_role = BlueGreenRole.parse_role(record[3], self._version) + phase = BlueGreenPhase.parse_phase(record[4]) if self._bg_role != bg_role: continue @@ -1018,7 +1026,7 @@ def _collect_status(self): self._version = status_info.version self._port = status_info.port - if self._should_collect_topology.is_set(): + if self.should_collect_topology.is_set(): current_host_names = {status.endpoint.lower() for status in status_entries if status.endpoint is not None and self._rds_utils.is_not_old_instance(status.endpoint)} @@ -1108,11 +1116,11 @@ def collect_topology(self): return self._current_topology = self._host_list_provider.force_refresh(conn) - if self._should_collect_topology: + if self.should_collect_topology: self._start_topology = self._current_topology current_topology_copy = self._current_topology - if current_topology_copy is not None and self._should_collect_topology: + if current_topology_copy is not None and self.should_collect_topology: self._host_names.update({host_info.host for host_info in current_topology_copy}) def _collect_ip_addresses(self): @@ -1121,31 +1129,33 @@ def _collect_ip_addresses(self): for host in self._host_names: self._current_ip_addresses_by_host.put_if_absent(host, self._get_ip_address(host)) - if self._should_collect_ip_addresses: + if self.should_collect_ip_addresses: self._start_ip_addresses_by_host.clear() self._start_ip_addresses_by_host.put_all(self._current_ip_addresses_by_host) def _update_ip_address_flags(self): - if self._should_collect_topology: + if self.should_collect_topology: self._all_start_topology_ip_changed = False self._all_start_topology_endpoints_removed = False self._all_topology_changed = False return - if not self._should_collect_ip_addresses: + if not self.should_collect_ip_addresses: # Check whether all hosts in start_topology resolve to new IP addresses self._all_start_topology_ip_changed = self._has_all_start_topology_ip_changed() # Check whether all hosts in start_topology no longer have IP addresses. This indicates that the start_topology # hosts can no longer be resolved because their DNS entries no longer exist. - self._all_start_topology_endpoints_removed = bool(self._start_topology) and \ - all( - self._start_ip_addresses_by_host.get(node.host) is not None and - self._current_ip_addresses_by_host.get(node.host) is None - for node in self._start_topology - ) + self._all_start_topology_endpoints_removed = ( + bool(self._start_topology) and + all( + self._start_ip_addresses_by_host.get(node.host) is not None and + self._current_ip_addresses_by_host.get(node.host) is None + for node in self._start_topology + ) + ) - if not self._should_collect_topology: + if not self.should_collect_topology: # Check whether all hosts in current_topology do not exist in start_topology start_topology_hosts = set() if self._start_topology is None else \ {host_info.host for host_info in self._start_topology} @@ -1196,7 +1206,6 @@ def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str) self._props = props self._bg_id = bg_id - self._monitors: List[Optional[BlueGreenStatusMonitor]] = [None, None] self._interim_status_hashes = [0, 0] self._latest_context_hash = 0 self._interim_statuses: List[Optional[BlueGreenInterimStatus]] = [None, None] @@ -1242,7 +1251,7 @@ def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str) logger.warning("BlueGreenStatusProvider.NoCurrentHostInfo", self._bg_id) return - self._monitors[BlueGreenRole.SOURCE.value] = BlueGreenStatusMonitor( + blue_monitor = BlueGreenStatusMonitor( BlueGreenRole.SOURCE, self._bg_id, current_host_info, @@ -1250,7 +1259,7 @@ def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str) self._get_monitoring_props(), self._status_check_intervals_ms, self._process_interim_status) - self._monitors[BlueGreenRole.TARGET.value] = BlueGreenStatusMonitor( + green_monitor = BlueGreenStatusMonitor( BlueGreenRole.TARGET, self._bg_id, current_host_info, @@ -1259,6 +1268,11 @@ def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str) self._status_check_intervals_ms, self._process_interim_status) + self._monitors: List[BlueGreenStatusMonitor] = [blue_monitor, green_monitor] + + for monitor in self._monitors: + monitor.start() + def _get_monitoring_props(self) -> Properties: monitoring_props = copy(self._props) for key in self._props.keys(): @@ -1778,14 +1792,14 @@ def _update_monitors(self): if phase == BlueGreenPhase.NOT_CREATED: for monitor in self._monitors: monitor.interval_rate = BlueGreenIntervalRate.BASELINE - monitor.collect_ip_address = False - monitor.collect_topology = False + monitor.should_collect_ip_addresses.clear() + monitor.should_collect_topology.clear() monitor.use_ip_address = False elif phase == BlueGreenPhase.CREATED: for monitor in self._monitors: monitor.interval_rate = BlueGreenIntervalRate.INCREASED - monitor.collect_ip_address = True - monitor.collect_topology = True + monitor.should_collect_ip_addresses.set() + monitor.should_collect_topology.set() monitor.use_ip_address = False if self._rollback: monitor.reset_collected_data() @@ -1794,14 +1808,14 @@ def _update_monitors(self): or phase == BlueGreenPhase.POST: for monitor in self._monitors: monitor.interval_rate = BlueGreenIntervalRate.HIGH - monitor.collect_ip_address = False - monitor.collect_topology = False + monitor.should_collect_ip_addresses.clear() + monitor.should_collect_topology.clear() monitor.use_ip_address = True elif phase == BlueGreenPhase.COMPLETED: for monitor in self._monitors: monitor.interval_rate = BlueGreenIntervalRate.BASELINE - monitor.collect_ip_address = False - monitor.collect_topology = False + monitor.should_collect_ip_addresses.clear() + monitor.should_collect_topology.clear() monitor.use_ip_address = False monitor.reset_collected_data() @@ -1819,7 +1833,7 @@ def _update_status_cache(self): if latest_status is not None: # Notify all waiting threads that the status has been updated. - with latest_status: + with latest_status.cv: latest_status.cv.notify_all() def _log_current_context(self): diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index be4b49d6..ff404ab1 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -16,6 +16,7 @@ from typing import TYPE_CHECKING, ClassVar, List, Type, TypeVar +from aws_advanced_python_wrapper import LogUtils from aws_advanced_python_wrapper.aurora_initial_connection_strategy_plugin import \ AuroraInitialConnectionStrategyPluginFactory from aws_advanced_python_wrapper.blue_green_plugin import \ @@ -29,6 +30,7 @@ from aws_advanced_python_wrapper.okta_plugin import OktaAuthPluginFactory from aws_advanced_python_wrapper.states.session_state_service import ( SessionStateService, SessionStateServiceImpl) +from aws_advanced_python_wrapper.utils.utils import Utils if TYPE_CHECKING: from aws_advanced_python_wrapper.allowed_and_blocked_hosts import AllowedAndBlockedHosts @@ -422,6 +424,36 @@ def set_current_connection(self, connection: Optional[Connection], host_info: Op @property def current_host_info(self) -> Optional[HostInfo]: + if self._current_host_info is not None: + return self._current_host_info + + self._current_host_info = self._initial_connection_host_info + if self._current_host_info is not None: + logger.debug("PluginServiceImpl.SetCurrentHostInfo", self._current_host_info) + return self._current_host_info + + all_hosts = self.all_hosts + if not all_hosts: + raise AwsWrapperError(Messages.get("PluginServiceImpl.HostListEmpty")) + + self._current_host_info = ( + next((host_info for host_info in all_hosts if host_info.role == HostRole.WRITER), None)) + if self._current_host_info: + allowed_hosts = self.hosts + if not Utils.contains_url(allowed_hosts, self._current_host_info.url): + raise AwsWrapperError( + Messages.get_formatted( + "PluginServiceImpl.CurrentHostNotAllowed", + self._current_host_info.url, LogUtils.log_topology(allowed_hosts))) + else: + allowed_hosts = self.hosts + if len(allowed_hosts) > 0: + self._current_host_info = self.hosts[0] + + if self._current_host_info is None: + raise AwsWrapperError("PluginServiceImpl.CouldNotDetermineCurrentHost") + + logger.debug("PluginServiceImpl.SetCurrentHostInfo", self._current_host_info) return self._current_host_info @property diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index dfb7033f..fa766d32 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -67,7 +67,7 @@ BlueGreenStatusProvider.GreenDnsRemoved=[BlueGreenStatusProvider] [bgdId: '{}'] BlueGreenStatusProvider.GreenNodeChangedName=[BlueGreenStatusProvider] Green node '{}' has changed its name to '{}'. BlueGreenStatusProvider.GreenTopologyChanged=[BlueGreenStatusProvider] [bgdId: '{}'] Green topology changed. BlueGreenStatusProvider.InterimStatus=[BlueGreenStatusProvider] [bgdId: '{}', role: {}] {} -self._plugin_service.current_host_info=[BlueGreenStatusProvider] [bgdId: '{}'] Unable to create Blue/Green monitors because information about the current host was not found. +BlueGreenStatusProvider.NoCurrentHostInfo=[BlueGreenStatusProvider] [bgdId: '{}'] Unable to create Blue/Green monitors because information about the current host was not found. BlueGreenStatusProvider.ResetContext=[BlueGreenStatusProvider] Resetting context. BlueGreenStatusProvider.Rollback=[BlueGreenStatusProvider] [bgdId: '{}'] Blue/Green deployment is in rollback mode. BlueGreenStatusProvider.SwitchoverTimeout=[BlueGreenStatusProvider] Blue/Green switchover has timed out. @@ -237,11 +237,15 @@ PluginManager.MethodInvokedAgainstOldConnection = [PluginManager] The internal c PluginManager.PipelineNone=[PluginManager] A pipeline was requested but the created pipeline evaluated to None. PluginManager.ResortedPlugins=[PluginManager] Plugins order has been rearranged. The following order is in effect: {}. +PluginServiceImpl.CouldNotDetermineCurrentHost=[PluginServiceImpl] The current host could not be determined. +PluginServiceImpl.CurrentHostNotAllowed=[PluginServiceImpl] The current host is not in the list of allowed hosts. Current host: '{}'. Allowed hosts: {} PluginServiceImpl.FailedToRetrieveHostPort=[PluginServiceImpl] Could not retrieve Host:Port for connection. {} PluginServiceImpl.FillAliasesTimeout=[PluginServiceImpl] The timeout limit was reached while querying for the current host's alias. PluginServiceImpl.GetHostRoleConnectionNone=[PluginServiceImpl] Attempted to evaluate the host role of the given connection, but could not find a non-None connection to evaluate. +PluginServiceImpl.HostListEmpty=[PluginServiceImpl] Could not determine the current host info because the current host list is empty. PluginServiceImpl.IncorrectStatusType=[PluginServiceImpl] Received an unexpected type from the status cache. An object of type {} was requested, but the object at key '{}' had a type of {}. The retrieved object was: {}. PluginServiceImpl.NonEmptyAliases=[PluginServiceImpl] fill_aliases called when HostInfo already contains the following aliases: {}. +PluginServiceImpl.SetCurrentHostInfo=[PluginServiceImpl] Set current host info to {} PluginServiceImpl.UnableToUpdateTransactionStatus=[PluginServiceImpl] Unable to update transaction status, current connection is None. PluginServiceImpl.UpdateDialectConnectionNone=[PluginServiceImpl] The plugin service attempted to update the current dialect but could not identify a connection to use. diff --git a/aws_advanced_python_wrapper/utils/concurrent.py b/aws_advanced_python_wrapper/utils/concurrent.py index 1c43a685..532b4ceb 100644 --- a/aws_advanced_python_wrapper/utils/concurrent.py +++ b/aws_advanced_python_wrapper/utils/concurrent.py @@ -146,6 +146,9 @@ def __init__(self, count=1): self.count = count self.condition = Condition() + def set_count(self, count: int): + self.count = count + def count_down(self): with self.condition: if self.count > 0: diff --git a/aws_advanced_python_wrapper/utils/utils.py b/aws_advanced_python_wrapper/utils/utils.py index 54fd497f..ec599a54 100644 --- a/aws_advanced_python_wrapper/utils/utils.py +++ b/aws_advanced_python_wrapper/utils/utils.py @@ -87,3 +87,11 @@ def initialize_class(full_class_name: str, *args): return getattr(m, parts[-1])(*args) except ModuleNotFoundError: return None + + @staticmethod + def contains_url(hosts: Tuple[HostInfo, ...], url: str) -> bool: + for host in hosts: + if host.url == url: + return True + + return False diff --git a/tests/integration/container/test_blue_green_deployment.py b/tests/integration/container/test_blue_green_deployment.py index 1b06b0fa..8228c654 100644 --- a/tests/integration/container/test_blue_green_deployment.py +++ b/tests/integration/container/test_blue_green_deployment.py @@ -30,8 +30,13 @@ import mysql.connector import psycopg +from mysql.connector import CMySQLConnection + +from aws_advanced_python_wrapper.mysql_driver_dialect import MySQLDriverDialect +from aws_advanced_python_wrapper.pg_driver_dialect import PgDriverDialect if TYPE_CHECKING: + from aws_advanced_python_wrapper.pep249 import Connection from .utils.connection_utils import ConnectionUtils from .utils.test_driver import TestDriver @@ -42,7 +47,6 @@ from threading import Event, Thread from time import perf_counter_ns, sleep -import boto3 import pytest from tabulate import tabulate # type: ignore @@ -55,7 +59,8 @@ from aws_advanced_python_wrapper.utils.concurrent import (ConcurrentDict, CountDownLatch) from aws_advanced_python_wrapper.utils.log import Logger -from aws_advanced_python_wrapper.utils.properties import WrapperProperties +from aws_advanced_python_wrapper.utils.properties import (Properties, + WrapperProperties) from aws_advanced_python_wrapper.utils.rdsutils import RdsUtils from .utils.conditions import enable_on_deployments, enable_on_features from .utils.database_engine import DatabaseEngine @@ -83,17 +88,18 @@ class TestBlueGreenDeployment: PG_RDS_BG_STATUS_QUERY = f"SELECT * FROM rds_tools.show_topology('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')" results: ConcurrentDict[str, BlueGreenResults] = ConcurrentDict() unhandled_exceptions: Deque[Exception] = deque() + mysql_dialect = MySQLDriverDialect(Properties()) + pg_dialect = PgDriverDialect(Properties()) @pytest.fixture(scope='class') def test_utility(self): - region: str = TestEnvironment.get_current().get_info().get_region() - return RdsTestUtility(region) + return RdsTestUtility.get_utility() @pytest.fixture(scope='class') def rds_utils(self): return RdsUtils() - def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment, test_driver): + def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment: TestEnvironment, test_driver): self.results.clear() self.unhandled_exceptions.clear() @@ -108,7 +114,7 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment, env = TestEnvironment.get_current() info = env.get_info() - db_name = info.get_db_name() + db_name = conn_utils.dbname test_instance = env.get_writer() topology_instances: List[str] = self.get_bg_endpoints( test_environment, test_utility, rds_utils, info.get_bg_deployment_id()) @@ -119,55 +125,56 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment, host_id = host[0:host.index(".")] assert host_id - self.results.put(host_id, BlueGreenResults()) + bg_results = BlueGreenResults() + self.results.put(host_id, bg_results) if rds_utils.is_not_green_or_old_instance(host): threads.append(Thread( target=self.direct_topology_monitor, args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, - finish_latch, self.results.get(host_id)))) + finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 threads.append(Thread( target=self.direct_blue_connectivity_monitor, args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, - finish_latch, self.results.get(host_id)))) + finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 threads.append(Thread( target=self.direct_blue_idle_connectivity_monitor, args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, - finish_latch, self.results.get(host_id)))) + finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 threads.append(Thread( target=self.wrapper_blue_idle_connectivity_monitor, args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, - finish_latch, self.results.get(host_id)))) + finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 threads.append(Thread( target=self.wrapper_blue_executing_connectivity_monitor, args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, - finish_latch, self.results.get(host_id)))) + finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 threads.append(Thread( target=self.wrapper_blue_new_connection_monitor, args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, - finish_latch, self.results.get(host_id)))) + finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 # TODO: should we increment thread_finish_count too? threads.append(Thread( target=self.blue_dns_monitor, - args=(host_id, host, start_latch, stop, finish_latch, self.results.get(host_id)))) + args=(host_id, host, start_latch, stop, finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 @@ -175,32 +182,32 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment, threads.append(Thread( target=self.direct_topology_monitor, args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, - finish_latch, self.results.get(host_id)))) + finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 threads.append(Thread( target=self.wrapper_green_connectivity_monitor, args=(test_driver, conn_utils, host_id, host, test_instance.get_port(), db_name, start_latch, stop, - finish_latch, self.results.get(host_id)))) + finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 threads.append(Thread( target=self.green_dns_monitor, - args=(host_id, host, start_latch, stop, finish_latch, self.results.get(host_id)))) + args=(host_id, host, start_latch, stop, finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 if iam_enabled: - rds_client = boto3.client("rds", region_name=test_environment.get_region()) + rds_client = test_utility.get_rds_client() threads.append(Thread( target=self.green_iam_connectivity_monitor, args=(test_driver, conn_utils, rds_client, host_id, "BlueHostToken", self.rds_utils().remove_green_instance_prefix(host), host, test_instance.get_port(), - db_name, start_latch, stop, finish_latch, self.results.get(host_id), - self.results.get(host_id).green_direct_iam_ip_with_blue_node_connect_times, False, True))) + db_name, start_latch, stop, finish_latch, bg_results, + bg_results.green_direct_iam_ip_with_blue_node_connect_times, False, True))) thread_count += 1 thread_finish_count += 1 @@ -208,8 +215,7 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment, target=self.green_iam_connectivity_monitor, args=(test_driver, conn_utils, rds_client, host_id, "GreenHostToken", host, host, test_instance.get_port(), db_name, start_latch, stop, finish_latch, - self.results.get(host_id), - self.results.get(host_id).green_direct_iam_ip_with_green_node_connect_times, True, False) + bg_results, bg_results.green_direct_iam_ip_with_green_node_connect_times, True, False) )) thread_count += 1 thread_finish_count += 1 @@ -220,6 +226,9 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment, thread_count += 1 thread_finish_count += 1 + start_latch.set_count(thread_count) + finish_latch.set_count(thread_finish_count) + for result in self.results.values(): result.start_time_ns.set(start_time_ns) @@ -237,7 +246,7 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment, stop.set() for thread in threads: - thread.join(timeout=10) + thread.join(timeout=30) if thread.is_alive(): self.logger.debug("Timed out waiting for a thread to stop running...") @@ -352,10 +361,9 @@ def direct_topology_monitor( pytest.fail(f"Unsupported database engine: {engine}") try: - conn = self.get_direct_connection( + conn = self.get_direct_connection_with_retry( test_driver, - **conn_utils.get_connect_params(host=host, port=port, dbname=db), - **self.get_telemetry_params()) + **conn_utils.get_connect_params(host=host, port=port, dbname=db)) self.logger.debug(f"[DirectTopology] @ {host_id}] Connection opened.") sleep(1) @@ -370,7 +378,7 @@ def direct_topology_monitor( end_time_ns = perf_counter_ns() + 15 * 60 * 1_000_000_000 # 15 minutes while not stop.is_set() and perf_counter_ns() < end_time_ns: if conn is None: - conn = self.get_direct_connection( + conn = self.get_direct_connection_with_retry( test_driver, **conn_utils.get_connect_params(host=host, port=port, dbname=db)) self.logger.debug(f"[DirectTopology] @ {host_id}] Connection re-opened.") @@ -378,9 +386,10 @@ def direct_topology_monitor( cursor = conn.cursor() cursor.execute(query) for record in cursor: - role = record["role"] - version = record["version"] - status = record["status"] + # columns: ID, hostId, endpoint, port, role, status, version + role = record[4] + status = record[5] + version = record[6] is_green = BlueGreenRole.parse_role(role, version) == BlueGreenRole.TARGET def _log_and_return_time(_) -> int: @@ -419,15 +428,16 @@ def get_telemetry_params(self) -> Dict[str, Any]: return params - def get_direct_connection(self, test_driver: TestDriver, **connect_params) -> AwsWrapperConnection: + def get_direct_connection_with_retry(self, test_driver: TestDriver, **connect_params) -> AwsWrapperConnection: conn = None connect_count = 0 target_driver_connect = DriverHelper.get_connect_func(test_driver) while conn is None and connect_count < 10: try: conn = target_driver_connect(**connect_params) - except Exception: + except Exception as e: # ignore, try to connect again + print(f"asdf {e}") pass connect_count += 1 @@ -437,14 +447,25 @@ def get_direct_connection(self, test_driver: TestDriver, **connect_params) -> Aw return conn - def close_connection(self, conn: Optional[AwsWrapperConnection]): + def close_connection(self, conn: Optional[Connection]): try: - if conn is not None and not conn.is_closed: + if conn is not None and not self.is_closed(conn): conn.close() except Exception: # do nothing pass + def is_closed(self, conn: Connection) -> bool: + if isinstance(conn, psycopg.Connection): + return self.pg_dialect.is_closed(conn) + elif isinstance(conn, CMySQLConnection): + return self.mysql_dialect.is_closed(conn) + elif isinstance(conn, AwsWrapperConnection): + return conn.is_closed + else: + pytest.fail( + f"Unable to determine if the connection was closed because it was of an unexpected type: {conn}") + # Blue node # Checking: connectivity, SELECT 1 # Can terminate for itself @@ -462,10 +483,9 @@ def direct_blue_connectivity_monitor( results: BlueGreenResults): conn = None try: - conn = self.get_direct_connection( + conn = self.get_direct_connection_with_retry( test_driver, - **conn_utils.get_connect_params(host=host, port=port, dbname=db), - **self.get_telemetry_params()) + **conn_utils.get_connect_params(host=host, port=port, dbname=db)) self.logger.debug(f"[DirectBlueConnectivity @ {host_id}] Connection opened.") sleep(1) @@ -511,10 +531,9 @@ def direct_blue_idle_connectivity_monitor( results: BlueGreenResults): conn = None try: - conn = self.get_direct_connection( + conn = self.get_direct_connection_with_retry( test_driver, - **conn_utils.get_connect_params(host=host, port=port, dbname=db), - **self.get_telemetry_params()) + **conn_utils.get_connect_params(host=host, port=port, dbname=db)) self.logger.debug(f"[DirectBlueIdleConnectivity @ {host_id}] Connection opened.") sleep(1) @@ -528,7 +547,7 @@ def direct_blue_idle_connectivity_monitor( while not stop.is_set(): try: - if conn.is_closed: + if self.is_closed(conn): results.direct_blue_idle_lost_connection_time_ns.set(perf_counter_ns()) break @@ -563,7 +582,7 @@ def wrapper_blue_idle_connectivity_monitor( conn = None try: connect_params = self.get_wrapper_connect_params(conn_utils, host, port, db) - conn = self.get_wrapper_connection(test_driver, **connect_params) + conn = self.get_wrapper_connection_with_retry(test_driver, **connect_params) self.logger.debug(f"[WrapperBlueIdleConnectivity @ {host_id}] Connection opened.") sleep(1) @@ -577,7 +596,7 @@ def wrapper_blue_idle_connectivity_monitor( while not stop.is_set(): try: - if conn.is_closed: + if self.is_closed(conn): results.wrapper_blue_idle_lost_connection_time_ns.set(perf_counter_ns()) break @@ -622,15 +641,16 @@ def get_wrapper_connect_params(self, conn_utils: ConnectionUtils, host: str, por return params - def get_wrapper_connection(self, test_driver: TestDriver, **connect_params) -> AwsWrapperConnection: + def get_wrapper_connection_with_retry(self, test_driver: TestDriver, **connect_params) -> AwsWrapperConnection: conn = None connect_count = 0 target_driver_connect = DriverHelper.get_connect_func(test_driver) while conn is None and connect_count < 10: try: conn = AwsWrapperConnection.connect(target_driver_connect, **connect_params) - except Exception: + except Exception as e: # ignore, try to connect again + print(f"asdf {e}") pass connect_count += 1 @@ -668,8 +688,9 @@ def wrapper_blue_executing_connectivity_monitor( pytest.fail(f"Unsupported database engine: {engine}") try: + target_driver_connect = DriverHelper.get_connect_func(test_driver) connect_params = self.get_wrapper_connect_params(conn_utils, host, port, db) - conn = self.get_wrapper_connection(test_driver, **connect_params) + conn = AwsWrapperConnection.connect(target_driver_connect, **connect_params) bg_plugin: Optional[BlueGreenPlugin] = conn._unwrap(BlueGreenPlugin) assert bg_plugin is not None, f"Unable to find blue/green plugin in wrapper connection for {host}." self.logger.debug(f"[WrapperBlueExecute @ {host_id}] Connection opened.") @@ -694,7 +715,7 @@ def wrapper_blue_executing_connectivity_monitor( except Exception as e: results.blue_wrapper_execute_times.append( TimeHolder(start_time_ns, perf_counter_ns(), bg_plugin.get_hold_time_ns(), str(e))) - if conn.is_closed: + if self.is_closed(conn): break sleep(1) @@ -724,6 +745,7 @@ def wrapper_blue_new_connection_monitor( results: BlueGreenResults): conn = None try: + target_driver_connect = DriverHelper.get_connect_func(test_driver) connect_params = self.get_wrapper_connect_params(conn_utils, host, port, db) sleep(1) @@ -739,7 +761,7 @@ def wrapper_blue_new_connection_monitor( start_time_ns = perf_counter_ns() try: - conn = self.get_wrapper_connection(test_driver, **connect_params) + conn = AwsWrapperConnection.connect(target_driver_connect, **connect_params) end_time_ns = perf_counter_ns() bg_plugin: Optional[BlueGreenPlugin] = conn._unwrap(BlueGreenPlugin) assert bg_plugin is not None, f"Unable to find blue/green plugin in wrapper connection for {host}." @@ -863,7 +885,7 @@ def wrapper_green_connectivity_monitor( conn = None try: connect_params = self.get_wrapper_connect_params(conn_utils, host, port, db) - conn = self.get_wrapper_connection(test_driver, **connect_params) + conn = self.get_wrapper_connection_with_retry(test_driver, **connect_params) self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Connection opened.") bg_plugin: Optional[BlueGreenPlugin] = conn._unwrap(BlueGreenPlugin) @@ -893,7 +915,7 @@ def wrapper_green_connectivity_monitor( self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Thread timeout exception: {e}") results.green_wrapper_execute_times.append( TimeHolder(start_time_ns, perf_counter_ns(), bg_plugin.get_hold_time_ns(), str(e))) - if conn.is_closed: + if self.is_closed(conn): results.wrapper_green_lost_connection_time_ns.set(perf_counter_ns()) break else: @@ -1239,13 +1261,13 @@ def assert_test(self): bg_trigger_time_ns = next((result.bg_trigger_time_ns.get() for result in self.results.values()), None) assert bg_trigger_time_ns is not None, "Cannot get bg_trigger_time" - max_green_node_change_time_ms = max( - (0 if result.green_node_change_name_time.get() == 0 - else (result.green_node_change_name_time.get() - bg_trigger_time_ns) // 1_000_000 + max_green_node_changed_name_time_ms = max( + (0 if result.green_node_changed_name_time_ns.get() == 0 + else (result.green_node_changed_name_time_ns.get() - bg_trigger_time_ns) // 1_000_000 for result in self.results.values()), default=0 ) - self.logger.debug(f"max_green_node_change_time: {max_green_node_change_time_ms} ms") + self.logger.debug(f"max_green_node_changed_name_time: {max_green_node_changed_name_time_ms} ms") switchover_complete_time_ms = max( (0 if x == 0 @@ -1255,11 +1277,12 @@ def assert_test(self): for x in [result.green_status_time.get("SWITCHOVER_COMPLETED", 0)]), default=0 ) - self.logger.debug(f"switchoverCompleteTime: {switchover_complete_time_ms} ms") + self.logger.debug(f"switchover_complete_time: {switchover_complete_time_ms} ms") # Assertions assert switchover_complete_time_ms != 0, "BG switchover hasn't completed." - assert switchover_complete_time_ms >= max_green_node_change_time_ms, "Green node changed name after SWITCHOVER_COMPLETED." + assert switchover_complete_time_ms >= max_green_node_changed_name_time_ms, \ + "Green node changed name after SWITCHOVER_COMPLETED." @dataclass diff --git a/tests/integration/container/utils/rds_test_utility.py b/tests/integration/container/utils/rds_test_utility.py index 9986bf9e..ae8886a3 100644 --- a/tests/integration/container/utils/rds_test_utility.py +++ b/tests/integration/container/utils/rds_test_utility.py @@ -54,6 +54,11 @@ def __init__(self, region: str, endpoint: Optional[str] = None): else: self._client = boto3.client(service_name='rds', region_name=region) + @staticmethod + def get_utility() -> RdsTestUtility: + test_info = TestEnvironment.get_current().get_info() + return RdsTestUtility(test_info.get_region(), test_info.get_rds_endpoint()) + def get_db_instance(self, instance_id: str) -> Optional[Dict[str, Any]]: filters = [{'Name': "db-instance-id", 'Values': [f"{instance_id}"]}] response = self._client.describe_db_instances(DBInstanceIdentifier=instance_id, @@ -63,6 +68,14 @@ def get_db_instance(self, instance_id: str) -> Optional[Dict[str, Any]]: return None return instances[0] + def get_rds_client(self): + test_info = TestEnvironment.get_current().get_info() + endpoint = test_info.get_rds_endpoint() + if endpoint: + return boto3.client(region=test_info.get_region(), endpoint_url=endpoint) + else: + return boto3.client(region=test_info.get_region()) + def does_db_instance_exist(self, instance_id: str) -> bool: try: instance = self.get_db_instance(instance_id) diff --git a/tests/integration/host/build.gradle.kts b/tests/integration/host/build.gradle.kts index 373e9428..7a4449ca 100644 --- a/tests/integration/host/build.gradle.kts +++ b/tests/integration/host/build.gradle.kts @@ -263,6 +263,88 @@ tasks.register("test-mysql-aurora-performance") { } } +tasks.register("test-bgd-mysql-rds-instance") { + group = "verification" + filter.includeTestsMatching("integration.host.TestRunner.runTests") + doFirst { + systemProperty("exclude-docker", "true") + systemProperty("exclude-performance", "true") + systemProperty("exclude-pg-driver", "true") + systemProperty("exclude-pg-engine", "true") + systemProperty("exclude-python-38", "true") + systemProperty("exclude-aurora", "true") + systemProperty("exclude-failover", "true") + systemProperty("exclude-secrets-manager", "true") + systemProperty("exclude-instances-2", "true") + systemProperty("exclude-instances-3", "true") + systemProperty("exclude-instances-5", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("test-bg-only", "true") + } +} + +tasks.register("test-bgd-mysql-aurora") { + group = "verification" + filter.includeTestsMatching("integration.host.TestRunner.runTests") + doFirst { + systemProperty("exclude-docker", "true") + systemProperty("exclude-performance", "true") + systemProperty("exclude-pg-driver", "true") + systemProperty("exclude-pg-engine", "true") + systemProperty("exclude-python-38", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-failover", "true") + systemProperty("exclude-secrets-manager", "true") + systemProperty("exclude-instances-1", "true") + systemProperty("exclude-instances-3", "true") + systemProperty("exclude-instances-5", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("test-bg-only", "true") + + } +} + +tasks.register("test-bgd-pg-rds-instance") { + group = "verification" + filter.includeTestsMatching("integration.host.TestRunner.runTests") + doFirst { + systemProperty("exclude-docker", "true") + systemProperty("exclude-performance", "true") + systemProperty("exclude-mysql-driver", "true") + systemProperty("exclude-mysql-engine", "true") + systemProperty("exclude-python-38", "true") + systemProperty("exclude-aurora", "true") + systemProperty("exclude-failover", "true") + systemProperty("exclude-secrets-manager", "true") + systemProperty("exclude-instances-2", "true") + systemProperty("exclude-instances-3", "true") + systemProperty("exclude-instances-5", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("test-bg-only", "true") + } +} + +tasks.register("test-bgd-pg-aurora") { + group = "verification" + filter.includeTestsMatching("integration.host.TestRunner.runTests") + doFirst { + systemProperty("exclude-docker", "true") + systemProperty("exclude-performance", "true") + systemProperty("exclude-mysql-driver", "true") + systemProperty("exclude-mysql-engine", "true") + systemProperty("exclude-python-38", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-failover", "true") + systemProperty("exclude-secrets-manager", "true") + systemProperty("exclude-instances-1", "true") + systemProperty("exclude-instances-3", "true") + systemProperty("exclude-instances-5", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("test-bg-only", "true") + + } +} + // Debug tasks.register("debug-all-environments") { @@ -413,3 +495,23 @@ tasks.register("debug-mysql-multi-az") { systemProperty("exclude-bg", "true") } } + +tasks.register("debug-bgd-pg-aurora") { + group = "verification" + filter.includeTestsMatching("integration.host.TestRunner.debugTests") + doFirst { + systemProperty("exclude-docker", "true") + systemProperty("exclude-performance", "true") + systemProperty("exclude-mysql-driver", "true") + systemProperty("exclude-mysql-engine", "true") + systemProperty("exclude-python-38", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-failover", "true") + systemProperty("exclude-secrets-manager", "true") + systemProperty("exclude-instances-1", "true") + systemProperty("exclude-instances-3", "true") + systemProperty("exclude-instances-5", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("test-bg-only", "true") + } +} diff --git a/tests/integration/host/src/test/java/integration/host/TestEnvironment.java b/tests/integration/host/src/test/java/integration/host/TestEnvironment.java index 61c9cf8b..06a4fa30 100644 --- a/tests/integration/host/src/test/java/integration/host/TestEnvironment.java +++ b/tests/integration/host/src/test/java/integration/host/TestEnvironment.java @@ -648,7 +648,7 @@ private static void initEnv(TestEnvironment env) { env.rdsDbName = config.rdsDbName; // "cluster-mysql" env.rdsDbDomain = config.rdsDbDomain; // "XYZ.us-west-2.rds.amazonaws.com" env.rdsEndpoint = config.rdsEndpoint; // "XYZ.us-west-2.rds.amazonaws.com" - env.info.setRdsEndpoint(env.rdsEndpoint); + env.info.setRdsEndpoint(env.rdsEndpoint); // "https://rds-int.amazon.com" env.auroraUtil = new AuroraTestUtility( @@ -796,9 +796,15 @@ private static void deAuthorizeIP(TestEnvironment env) { throw new RuntimeException(e); } } - env.auroraUtil.ec2DeauthorizesIP(env.runnerIP); - LOGGER.finest(String.format("Test runner IP %s de-authorized. Usage count: %d", - env.runnerIP, ipAddressUsageRefCount.get())); + + if (!env.reuseDb) { + env.auroraUtil.ec2DeauthorizesIP(env.runnerIP); + LOGGER.finest(String.format("Test runner IP %s de-authorized. Usage count: %d", + env.runnerIP, ipAddressUsageRefCount.get())); + } else { + LOGGER.finest("The IP address usage count hit 0, but the REUSE_RDS_DB was set to true, so IP " + + "de-authorization was skipped."); + } } else { LOGGER.finest("IP usage count: " + ipAddressUsageRefCount.get()); } From 312a3cd0bd34ff728a85b2e97eb12e83914f2b56 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 26 Jun 2025 15:08:18 -0700 Subject: [PATCH 21/41] Multi-AZ instance passing without IAM --- .../blue_green_plugin.py | 5 +- .../database_dialect.py | 9 +- tests/integration/container/conftest.py | 2 +- .../container/test_aurora_failover.py | 2 +- .../container/test_basic_connectivity.py | 2 +- .../container/test_blue_green_deployment.py | 87 ++++++++++--------- .../container/test_read_write_splitting.py | 4 +- .../utils/database_engine_deployment.py | 4 +- .../container/utils/rds_test_utility.py | 6 +- tests/integration/host/build.gradle.kts | 64 +++++++++++++- 10 files changed, 125 insertions(+), 60 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 5385acff..977a69a4 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -979,9 +979,8 @@ def _collect_status(self): with conn.cursor() as cursor: cursor.execute(self._bg_dialect.blue_green_status_query) for record in cursor: - # columns: id, endpoint, port, role, status, version, update_stamp - # TODO: is the order of columns the same for all dialects? - version = record[5] + # columns: version, endpoint, port, role, status + version = record[0] if version not in BlueGreenStatusMonitor._KNOWN_VERSIONS: self._version = BlueGreenStatusMonitor._LATEST_KNOWN_VERSION logger.warning( diff --git a/aws_advanced_python_wrapper/database_dialect.py b/aws_advanced_python_wrapper/database_dialect.py index 48af73dd..91af5c09 100644 --- a/aws_advanced_python_wrapper/database_dialect.py +++ b/aws_advanced_python_wrapper/database_dialect.py @@ -269,7 +269,7 @@ def is_blue_green_status_available(self, conn: Connection) -> bool: class RdsMysqlDialect(MysqlDatabaseDialect, BlueGreenDialect): _DIALECT_UPDATE_CANDIDATES = (DialectCode.AURORA_MYSQL, DialectCode.MULTI_AZ_MYSQL) - _BG_STATUS_QUERY = "SELECT * FROM mysql.rds_topology" + _BG_STATUS_QUERY = "SELECT version, endpoint, port, role, status FROM mysql.rds_topology" _BG_STATUS_EXISTS_QUERY = \ "SELECT 1 AS tmp FROM information_schema.tables WHERE table_schema = 'mysql' AND table_name = 'rds_topology'" @@ -323,7 +323,8 @@ class RdsPgDialect(PgDatabaseDialect, BlueGreenDialect): "WHERE name='rds.extensions'") _DIALECT_UPDATE_CANDIDATES = (DialectCode.AURORA_PG, DialectCode.MULTI_AZ_PG) - _BG_STATUS_QUERY = f"SELECT * FROM rds_tools.show_topology('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')" + _BG_STATUS_QUERY = (f"SELECT version, endpoint, port, role, status " + f"FROM rds_tools.show_topology('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')") _BG_STATUS_EXISTS_QUERY = "SELECT 'rds_tools.show_topology'::regproc" def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: @@ -374,7 +375,7 @@ class AuroraMysqlDialect(MysqlDatabaseDialect, TopologyAwareDatabaseDialect, Blu _HOST_ID_QUERY = "SELECT @@aurora_server_id" _IS_READER_QUERY = "SELECT @@innodb_read_only" - _BG_STATUS_QUERY = "SELECT * FROM mysql.rds_topology" + _BG_STATUS_QUERY = "SELECT version, endpoint, port, role, status FROM mysql.rds_topology" _BG_STATUS_EXISTS_QUERY = \ "SELECT 1 AS tmp FROM information_schema.tables WHERE table_schema = 'mysql' AND table_name = 'rds_topology'" @@ -430,7 +431,7 @@ class AuroraPgDialect(PgDatabaseDialect, TopologyAwareDatabaseDialect, BlueGreen _HOST_ID_QUERY = "SELECT aurora_db_instance_identifier()" _IS_READER_QUERY = "SELECT pg_is_in_recovery()" - _BG_STATUS_QUERY = f"SELECT * FROM get_blue_green_fast_switchover_metadata('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')" + _BG_STATUS_QUERY = f"SELECT version, endpoint, port, role, status FROM get_blue_green_fast_switchover_metadata('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')" _BG_STATUS_EXISTS_QUERY = "SELECT 'get_blue_green_fast_switchover_metadata'::regproc" @property diff --git a/tests/integration/container/conftest.py b/tests/integration/container/conftest.py index 1ebfdd7a..c438561f 100644 --- a/tests/integration/container/conftest.py +++ b/tests/integration/container/conftest.py @@ -84,7 +84,7 @@ def pytest_runtest_setup(item): ProxyHelper.enable_all_connectivity() deployment = request.get_database_engine_deployment() - if DatabaseEngineDeployment.AURORA == deployment or DatabaseEngineDeployment.MULTI_AZ_CLUSTER == deployment: + if DatabaseEngineDeployment.AURORA == deployment or DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER == deployment: rds_utility = RdsTestUtility(info.get_region(), info.get_rds_endpoint()) rds_utility.wait_until_cluster_has_desired_status(info.get_db_name(), "available") diff --git a/tests/integration/container/test_aurora_failover.py b/tests/integration/container/test_aurora_failover.py index e40e18ff..71524bee 100644 --- a/tests/integration/container/test_aurora_failover.py +++ b/tests/integration/container/test_aurora_failover.py @@ -41,7 +41,7 @@ @enable_on_num_instances(min_instances=2) -@enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.MULTI_AZ_CLUSTER]) +@enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER]) @disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT, TestEnvironmentFeatures.PERFORMANCE]) diff --git a/tests/integration/container/test_basic_connectivity.py b/tests/integration/container/test_basic_connectivity.py index 77aa166a..6745eac6 100644 --- a/tests/integration/container/test_basic_connectivity.py +++ b/tests/integration/container/test_basic_connectivity.py @@ -127,7 +127,7 @@ def test_proxied_wrapper_connection_failed( assert True @enable_on_num_instances(min_instances=2) - @enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.MULTI_AZ_CLUSTER]) + @enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER]) @enable_on_features([TestEnvironmentFeatures.ABORT_CONNECTION_SUPPORTED]) def test_wrapper_connection_reader_cluster_with_efm_enabled(self, test_driver: TestDriver, conn_utils): target_driver_connect = DriverHelper.get_connect_func(test_driver) diff --git a/tests/integration/container/test_blue_green_deployment.py b/tests/integration/container/test_blue_green_deployment.py index 8228c654..0c431285 100644 --- a/tests/integration/container/test_blue_green_deployment.py +++ b/tests/integration/container/test_blue_green_deployment.py @@ -26,11 +26,12 @@ from __future__ import annotations +import logging from typing import TYPE_CHECKING, Any, Deque, Dict, List, Optional, Tuple import mysql.connector import psycopg -from mysql.connector import CMySQLConnection +from mysql.connector import CMySQLConnection, MySQLConnection from aws_advanced_python_wrapper.mysql_driver_dialect import MySQLDriverDialect from aws_advanced_python_wrapper.pg_driver_dialect import PgDriverDialect @@ -71,7 +72,7 @@ from .utils.test_environment_features import TestEnvironmentFeatures -@enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.MULTI_AZ_INSTANCE]) +@enable_on_deployments([DatabaseEngineDeployment.AURORA, DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE]) @enable_on_features([TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT]) class TestBlueGreenDeployment: logger = Logger(__name__) @@ -85,7 +86,9 @@ class TestBlueGreenDeployment: PG_AURORA_BG_STATUS_QUERY = \ ("SELECT id, SPLIT_PART(endpoint, '.', 1) as hostId, endpoint, port, role, status, version " "FROM get_blue_green_fast_switchover_metadata('aws_jdbc_driver')") - PG_RDS_BG_STATUS_QUERY = f"SELECT * FROM rds_tools.show_topology('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')" + PG_RDS_BG_STATUS_QUERY = \ + (f"SELECT id, SPLIT_PART(endpoint, '.', 1) as hostId, endpoint, port, role, status, version " + f"FROM rds_tools.show_topology('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')") results: ConcurrentDict[str, BlueGreenResults] = ConcurrentDict() unhandled_exceptions: Deque[Exception] = deque() mysql_dialect = MySQLDriverDialect(Properties()) @@ -170,7 +173,6 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment: finish_latch, bg_results))) thread_count += 1 thread_finish_count += 1 - # TODO: should we increment thread_finish_count too? threads.append(Thread( target=self.blue_dns_monitor, @@ -240,7 +242,7 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment: finish_latch.wait_sec(6 * 60) self.logger.debug("All threads completed.") - sleep(3 * 60) + sleep(6 * 60) self.logger.debug("Stopping all threads...") stop.set() @@ -277,7 +279,7 @@ def get_bg_endpoints( if bg_deployment is None: pytest.fail(f"Blue/Green deployment with ID '{bg_id}' not found.") - if test_env.get_deployment() == DatabaseEngineDeployment.MULTI_AZ_INSTANCE: + if test_env.get_deployment() == DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE: blue_instance = test_utility.get_rds_instance_info_by_arn(bg_deployment["Source"]) if blue_instance is None: pytest.fail("Blue instance not found.") @@ -353,7 +355,7 @@ def direct_topology_monitor( db_deployment = test_env.get_deployment() if db_deployment == DatabaseEngineDeployment.AURORA: query = self.PG_AURORA_BG_STATUS_QUERY - elif db_deployment == DatabaseEngineDeployment.MULTI_AZ_INSTANCE: + elif db_deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE: query = self.PG_RDS_BG_STATUS_QUERY else: pytest.fail(f"Unsupported blue/green database engine deployment: {db_deployment}") @@ -383,23 +385,23 @@ def direct_topology_monitor( self.logger.debug(f"[DirectTopology] @ {host_id}] Connection re-opened.") try: - cursor = conn.cursor() - cursor.execute(query) - for record in cursor: - # columns: ID, hostId, endpoint, port, role, status, version - role = record[4] - status = record[5] - version = record[6] - is_green = BlueGreenRole.parse_role(role, version) == BlueGreenRole.TARGET - - def _log_and_return_time(_) -> int: - self.logger.debug(f"[DirectTopology] @ {host_id}] Status changed to: {status}.") - return perf_counter_ns() - - if is_green: - results.green_status_time.compute_if_absent(status, _log_and_return_time) - else: - results.blue_status_time.compute_if_absent(status, _log_and_return_time) + with conn.cursor() as cursor: + cursor.execute(query) + for record in cursor: + # columns: id, hostid, endpoint, port, role, status, version + role = record[4] + status = record[5] + version = record[6] + is_green = BlueGreenRole.parse_role(role, version) == BlueGreenRole.TARGET + + def _log_and_return_time(_) -> int: + self.logger.debug(f"[DirectTopology] @ {host_id}] Status changed to: {status}.") + return perf_counter_ns() + + if is_green: + results.green_status_time.compute_if_absent(status, _log_and_return_time) + else: + results.blue_status_time.compute_if_absent(status, _log_and_return_time) sleep(0.1) except Exception as e: @@ -458,7 +460,7 @@ def close_connection(self, conn: Optional[Connection]): def is_closed(self, conn: Connection) -> bool: if isinstance(conn, psycopg.Connection): return self.pg_dialect.is_closed(conn) - elif isinstance(conn, CMySQLConnection): + elif isinstance(conn, CMySQLConnection) or isinstance(conn, MySQLConnection): return self.mysql_dialect.is_closed(conn) elif isinstance(conn, AwsWrapperConnection): return conn.is_closed @@ -499,9 +501,10 @@ def direct_blue_connectivity_monitor( while not stop.is_set(): try: - cursor = conn.cursor() - cursor.execute("SELECT 1") - sleep(1) + with conn.cursor() as cursor: + cursor.execute("SELECT 1") + cursor.fetchall() + sleep(1) except Exception as e: self.logger.debug(f"[DirectBlueConnectivity @ {host_id}] Thread exception: {e}") results.direct_blue_lost_connection_time_ns.set(perf_counter_ns()) @@ -626,7 +629,7 @@ def get_wrapper_connect_params(self, conn_utils: ConnectionUtils, host: str, por params[WrapperProperties.DIALECT.name] = DialectCode.AURORA_MYSQL elif engine == DatabaseEngine.PG: params[WrapperProperties.DIALECT.name] = DialectCode.AURORA_PG - elif db_deployment == DatabaseEngineDeployment.MULTI_AZ_INSTANCE: + elif db_deployment == DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE: if engine == DatabaseEngine.MYSQL: params[WrapperProperties.DIALECT.name] = DialectCode.RDS_MYSQL elif engine == DatabaseEngine.PG: @@ -707,11 +710,12 @@ def wrapper_blue_executing_connectivity_monitor( while not stop.is_set(): start_time_ns = perf_counter_ns() try: - cursor = conn.cursor() - cursor.execute(query) - end_time_ns = perf_counter_ns() - results.blue_wrapper_execute_times.append( - TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns())) + with conn.cursor() as cursor: + cursor.execute(query) + cursor.fetchall() + end_time_ns = perf_counter_ns() + results.blue_wrapper_execute_times.append( + TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns())) except Exception as e: results.blue_wrapper_execute_times.append( TimeHolder(start_time_ns, perf_counter_ns(), bg_plugin.get_hold_time_ns(), str(e))) @@ -903,13 +907,14 @@ def wrapper_green_connectivity_monitor( start_time_ns = perf_counter_ns() while not stop.is_set(): try: - cursor = conn.cursor() - start_time_ns = perf_counter_ns() - cursor.execute("SELECT 1") - end_time_ns = perf_counter_ns() - results.green_wrapper_execute_times.append( - TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns())) - sleep(1) + with conn.cursor() as cursor: + start_time_ns = perf_counter_ns() + cursor.execute("SELECT 1") + cursor.fetchall() + end_time_ns = perf_counter_ns() + results.green_wrapper_execute_times.append( + TimeHolder(start_time_ns, end_time_ns, bg_plugin.get_hold_time_ns())) + sleep(1) except Exception as e: if self.is_timeout_exception(e): self.logger.debug(f"[WrapperGreenConnectivity @ {host_id}] Thread timeout exception: {e}") diff --git a/tests/integration/container/test_read_write_splitting.py b/tests/integration/container/test_read_write_splitting.py index dc2d00af..de75badb 100644 --- a/tests/integration/container/test_read_write_splitting.py +++ b/tests/integration/container/test_read_write_splitting.py @@ -43,8 +43,8 @@ @enable_on_num_instances(min_instances=2) @enable_on_deployments([DatabaseEngineDeployment.AURORA, - DatabaseEngineDeployment.MULTI_AZ_CLUSTER, - DatabaseEngineDeployment.MULTI_AZ_INSTANCE]) + DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER, + DatabaseEngineDeployment.RDS_MULTI_AZ_INSTANCE]) @disable_on_features([TestEnvironmentFeatures.RUN_AUTOSCALING_TESTS_ONLY, TestEnvironmentFeatures.BLUE_GREEN_DEPLOYMENT, TestEnvironmentFeatures.PERFORMANCE]) diff --git a/tests/integration/container/utils/database_engine_deployment.py b/tests/integration/container/utils/database_engine_deployment.py index 38b70aed..c58817a3 100644 --- a/tests/integration/container/utils/database_engine_deployment.py +++ b/tests/integration/container/utils/database_engine_deployment.py @@ -18,6 +18,6 @@ class DatabaseEngineDeployment(str, Enum): DOCKER = "DOCKER" RDS = "RDS" - MULTI_AZ_CLUSTER = "MULTI_AZ_CLUSTER" - MULTI_AZ_INSTANCE = "MULTI_AZ_INSTANCE" + RDS_MULTI_AZ_CLUSTER = "RDS_MULTI_AZ_CLUSTER" + RDS_MULTI_AZ_INSTANCE = "RDS_MULTI_AZ_INSTANCE" AURORA = "AURORA" diff --git a/tests/integration/container/utils/rds_test_utility.py b/tests/integration/container/utils/rds_test_utility.py index ae8886a3..bf8db8bb 100644 --- a/tests/integration/container/utils/rds_test_utility.py +++ b/tests/integration/container/utils/rds_test_utility.py @@ -149,7 +149,7 @@ def failover_cluster_and_wait_until_writer_changed( cluster_id: Optional[str] = None, target_id: Optional[str] = None) -> None: deployment = TestEnvironment.get_current().get_deployment() - if DatabaseEngineDeployment.MULTI_AZ_CLUSTER == deployment and target_id is not None: + if DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER == deployment and target_id is not None: raise Exception(Messages.get_formatted("RdsTestUtility.FailoverToTargetNotSupported", target_id, deployment)) start = perf_counter_ns() @@ -243,7 +243,7 @@ def query_instance_id( if DatabaseEngineDeployment.AURORA == database_deployment: return self._query_aurora_instance_id(conn, database_engine) - elif DatabaseEngineDeployment.MULTI_AZ_CLUSTER == database_deployment: + elif DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER == database_deployment: return self._query_multi_az_instance_id(conn, database_engine) else: raise RuntimeError(Messages.get_formatted( @@ -307,7 +307,7 @@ def get_instance_ids(self, host: Optional[str] = None) -> List[str]: deployment: DatabaseEngineDeployment = test_environment.get_deployment() if DatabaseEngineDeployment.AURORA == deployment: return self._get_aurora_instance_ids(host) - elif DatabaseEngineDeployment.MULTI_AZ_CLUSTER == deployment: + elif DatabaseEngineDeployment.RDS_MULTI_AZ_CLUSTER == deployment: return self._get_multi_az_instance_ids(host) else: raise RuntimeError("RdsTestUtility.MethodNotSupportedForDeployment", "get_instance_ids", deployment) diff --git a/tests/integration/host/build.gradle.kts b/tests/integration/host/build.gradle.kts index 7a4449ca..0d25b9a0 100644 --- a/tests/integration/host/build.gradle.kts +++ b/tests/integration/host/build.gradle.kts @@ -263,7 +263,7 @@ tasks.register("test-mysql-aurora-performance") { } } -tasks.register("test-bgd-mysql-rds-instance") { +tasks.register("test-bgd-mysql-instance") { group = "verification" filter.includeTestsMatching("integration.host.TestRunner.runTests") doFirst { @@ -304,7 +304,7 @@ tasks.register("test-bgd-mysql-aurora") { } } -tasks.register("test-bgd-pg-rds-instance") { +tasks.register("test-bgd-pg-instance") { group = "verification" filter.includeTestsMatching("integration.host.TestRunner.runTests") doFirst { @@ -515,3 +515,63 @@ tasks.register("debug-bgd-pg-aurora") { systemProperty("test-bg-only", "true") } } + +tasks.register("debug-bgd-mysql-aurora") { + group = "verification" + filter.includeTestsMatching("integration.host.TestRunner.debugTests") + doFirst { + systemProperty("exclude-docker", "true") + systemProperty("exclude-performance", "true") + systemProperty("exclude-pg-driver", "true") + systemProperty("exclude-pg-engine", "true") + systemProperty("exclude-python-38", "true") + systemProperty("exclude-multi-az-instance", "true") + systemProperty("exclude-failover", "true") + systemProperty("exclude-secrets-manager", "true") + systemProperty("exclude-instances-1", "true") + systemProperty("exclude-instances-3", "true") + systemProperty("exclude-instances-5", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("test-bg-only", "true") + } +} + +tasks.register("debug-bgd-mysql-instance") { + group = "verification" + filter.includeTestsMatching("integration.host.TestRunner.debugTests") + doFirst { + systemProperty("exclude-docker", "true") + systemProperty("exclude-performance", "true") + systemProperty("exclude-pg-driver", "true") + systemProperty("exclude-pg-engine", "true") + systemProperty("exclude-python-38", "true") + systemProperty("exclude-aurora", "true") + systemProperty("exclude-failover", "true") + systemProperty("exclude-secrets-manager", "true") + systemProperty("exclude-instances-2", "true") + systemProperty("exclude-instances-3", "true") + systemProperty("exclude-instances-5", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("test-bg-only", "true") + } +} + +tasks.register("debug-bgd-pg-instance") { + group = "verification" + filter.includeTestsMatching("integration.host.TestRunner.debugTests") + doFirst { + systemProperty("exclude-docker", "true") + systemProperty("exclude-performance", "true") + systemProperty("exclude-mysql-driver", "true") + systemProperty("exclude-mysql-engine", "true") + systemProperty("exclude-python-38", "true") + systemProperty("exclude-aurora", "true") + systemProperty("exclude-failover", "true") + systemProperty("exclude-secrets-manager", "true") + systemProperty("exclude-instances-2", "true") + systemProperty("exclude-instances-3", "true") + systemProperty("exclude-instances-5", "true") + systemProperty("exclude-multi-az-cluster", "true") + systemProperty("test-bg-only", "true") + } +} From b8867865c29b66163e7db0d2f212f467fa8a4ce7 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 27 Jun 2025 16:33:35 -0700 Subject: [PATCH 22/41] test_switchover passing --- .../blue_green_plugin.py | 1 - .../database_dialect.py | 3 ++- aws_advanced_python_wrapper/utils/atomic.py | 3 +++ .../utils/concurrent.py | 3 +++ .../container/test_blue_green_deployment.py | 21 +++++++++++-------- .../container/utils/rds_test_utility.py | 4 ++-- 6 files changed, 22 insertions(+), 13 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 977a69a4..92a93629 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -14,7 +14,6 @@ from __future__ import annotations -import logging import socket from datetime import datetime from time import perf_counter_ns diff --git a/aws_advanced_python_wrapper/database_dialect.py b/aws_advanced_python_wrapper/database_dialect.py index 91af5c09..da60fb11 100644 --- a/aws_advanced_python_wrapper/database_dialect.py +++ b/aws_advanced_python_wrapper/database_dialect.py @@ -431,7 +431,8 @@ class AuroraPgDialect(PgDatabaseDialect, TopologyAwareDatabaseDialect, BlueGreen _HOST_ID_QUERY = "SELECT aurora_db_instance_identifier()" _IS_READER_QUERY = "SELECT pg_is_in_recovery()" - _BG_STATUS_QUERY = f"SELECT version, endpoint, port, role, status FROM get_blue_green_fast_switchover_metadata('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')" + _BG_STATUS_QUERY = (f"SELECT version, endpoint, port, role, status " + f"FROM get_blue_green_fast_switchover_metadata('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')") _BG_STATUS_EXISTS_QUERY = "SELECT 'get_blue_green_fast_switchover_metadata'::regproc" @property diff --git a/aws_advanced_python_wrapper/utils/atomic.py b/aws_advanced_python_wrapper/utils/atomic.py index 13888ad0..42d7961e 100644 --- a/aws_advanced_python_wrapper/utils/atomic.py +++ b/aws_advanced_python_wrapper/utils/atomic.py @@ -20,6 +20,9 @@ def __init__(self, initial_value: int = 0): self._value = initial_value self._lock: Lock = Lock() + def __str__(self): + return f"AtomicInt[value={self._value}]" + def get(self): with self._lock: return self._value diff --git a/aws_advanced_python_wrapper/utils/concurrent.py b/aws_advanced_python_wrapper/utils/concurrent.py index 532b4ceb..17d6fd8b 100644 --- a/aws_advanced_python_wrapper/utils/concurrent.py +++ b/aws_advanced_python_wrapper/utils/concurrent.py @@ -37,6 +37,9 @@ def __len__(self): def __contains__(self, key): return key in self._dict + def __str__(self): + return f"ConcurrentDict{str(self._dict)}" + def get(self, key: K, default_value: Optional[V] = None) -> Optional[V]: return self._dict.get(key, default_value) diff --git a/tests/integration/container/test_blue_green_deployment.py b/tests/integration/container/test_blue_green_deployment.py index 0c431285..4a26da18 100644 --- a/tests/integration/container/test_blue_green_deployment.py +++ b/tests/integration/container/test_blue_green_deployment.py @@ -26,7 +26,6 @@ from __future__ import annotations -import logging from typing import TYPE_CHECKING, Any, Deque, Dict, List, Optional, Tuple import mysql.connector @@ -207,7 +206,7 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment: threads.append(Thread( target=self.green_iam_connectivity_monitor, args=(test_driver, conn_utils, rds_client, host_id, "BlueHostToken", - self.rds_utils().remove_green_instance_prefix(host), host, test_instance.get_port(), + rds_utils.remove_green_instance_prefix(host), host, test_instance.get_port(), db_name, start_latch, stop, finish_latch, bg_results, bg_results.green_direct_iam_ip_with_blue_node_connect_times, False, True))) thread_count += 1 @@ -242,7 +241,7 @@ def test_switchover(self, conn_utils, test_utility, rds_utils, test_environment: finish_latch.wait_sec(6 * 60) self.logger.debug("All threads completed.") - sleep(6 * 60) + sleep(12 * 60) self.logger.debug("Stopping all threads...") stop.set() @@ -437,9 +436,8 @@ def get_direct_connection_with_retry(self, test_driver: TestDriver, **connect_pa while conn is None and connect_count < 10: try: conn = target_driver_connect(**connect_params) - except Exception as e: + except Exception: # ignore, try to connect again - print(f"asdf {e}") pass connect_count += 1 @@ -642,6 +640,9 @@ def get_wrapper_connect_params(self, conn_utils: ConnectionUtils, host: str, por else: params[WrapperProperties.PLUGINS.name] = "bg" + if engine == DatabaseEngine.MYSQL: + params["use_pure"] = False + return params def get_wrapper_connection_with_retry(self, test_driver: TestDriver, **connect_params) -> AwsWrapperConnection: @@ -651,9 +652,8 @@ def get_wrapper_connection_with_retry(self, test_driver: TestDriver, **connect_p while conn is None and connect_count < 10: try: conn = AwsWrapperConnection.connect(target_driver_connect, **connect_params) - except Exception as e: + except Exception: # ignore, try to connect again - print(f"asdf {e}") pass connect_count += 1 @@ -1002,7 +1002,10 @@ def green_iam_connectivity_monitor( green_ip = socket.gethostbyname(connect_host) connect_params = conn_utils.get_connect_params(host=green_ip, port=port, user=iam_user, dbname=db) connect_params[WrapperProperties.CONNECT_TIMEOUT_SEC.name] = 10 - connect_params[WrapperProperties.SOCKET_TIMEOUT_SEC.name] = 10 + if test_env.get_engine() == DatabaseEngine.MYSQL: + # Required to connect with IAM using the regular mysql driver + connect_params["auth_plugin"] = "mysql_clear_password" + connect_params["use_pure"] = False sleep(1) @@ -1015,7 +1018,7 @@ def green_iam_connectivity_monitor( f"[DirectGreenIamIp{thread_prefix} @ {host_id}] Starting connectivity monitoring {iam_token_host}") while not stop.is_set(): - token = rds_client.generate_db_auth_token(DBHostname=iam_token_host, port=port, DBUsername=iam_user) + token = rds_client.generate_db_auth_token(DBHostname=iam_token_host, Port=port, DBUsername=iam_user) connect_params[WrapperProperties.PASSWORD.name] = token start_ns = perf_counter_ns() diff --git a/tests/integration/container/utils/rds_test_utility.py b/tests/integration/container/utils/rds_test_utility.py index bf8db8bb..16fa13fc 100644 --- a/tests/integration/container/utils/rds_test_utility.py +++ b/tests/integration/container/utils/rds_test_utility.py @@ -72,9 +72,9 @@ def get_rds_client(self): test_info = TestEnvironment.get_current().get_info() endpoint = test_info.get_rds_endpoint() if endpoint: - return boto3.client(region=test_info.get_region(), endpoint_url=endpoint) + return boto3.client(service_name='rds', region_name=test_info.get_region(), endpoint_url=endpoint) else: - return boto3.client(region=test_info.get_region()) + return boto3.client(service_name='rds', region_name=test_info.get_region()) def does_db_instance_exist(self, instance_id: str) -> bool: try: From 835dd24e4d040705770e3f50938e26bf1d495264 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Tue, 8 Jul 2025 13:54:39 -0700 Subject: [PATCH 23/41] Cleanup --- aws_advanced_python_wrapper/plugin_service.py | 1 - .../resources/aws_advanced_python_wrapper_messages.properties | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 5e270c43..ff404ab1 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -114,7 +114,6 @@ def plugin_manager(self, value): self._plugin_manager = value -T = TypeVar('T') StatusType = TypeVar('StatusType') UnwrapType = TypeVar('UnwrapType') diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index b8761098..57a8f7e2 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -242,8 +242,8 @@ PluginServiceImpl.CurrentHostNotAllowed=[PluginServiceImpl] The current host is PluginServiceImpl.FailedToRetrieveHostPort=[PluginServiceImpl] Could not retrieve Host:Port for connection. {} PluginServiceImpl.FillAliasesTimeout=[PluginServiceImpl] The timeout limit was reached while querying for the current host's alias. PluginServiceImpl.GetHostRoleConnectionNone=[PluginServiceImpl] Attempted to evaluate the host role of the given connection, but could not find a non-None connection to evaluate. -PluginServiceImpl.IncorrectStatusType=[PluginServiceImpl] Received an unexpected type from the status cache. An object of type {} was requested, but the object at key '{}' had a type of {}. The retrieved object was: {}. PluginServiceImpl.HostListEmpty=[PluginServiceImpl] Could not determine the current host info because the current host list is empty. +PluginServiceImpl.IncorrectStatusType=[PluginServiceImpl] Received an unexpected type from the status cache. An object of type {} was requested, but the object at key '{}' had a type of {}. The retrieved object was: {}. PluginServiceImpl.NonEmptyAliases=[PluginServiceImpl] fill_aliases called when HostInfo already contains the following aliases: {}. PluginServiceImpl.SetCurrentHostInfo=[PluginServiceImpl] Set current host info to {} PluginServiceImpl.UnableToUpdateTransactionStatus=[PluginServiceImpl] Unable to update transaction status, current connection is None. From fcbe35a3de81e2832b0a1808b64338fc5f60fb1e Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 10 Jul 2025 14:56:41 -0700 Subject: [PATCH 24/41] PR suggestions --- .../blue_green_plugin.py | 162 +++++++++--------- .../database_dialect.py | 4 +- ...dvanced_python_wrapper_messages.properties | 12 +- .../utils/properties.py | 2 +- 4 files changed, 90 insertions(+), 90 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 92a93629..b9a542f7 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -67,9 +67,9 @@ class BlueGreenIntervalRate(Enum): class BlueGreenPhase(Enum): NOT_CREATED = (0, False) CREATED = (1, False) - PREPARATION = (2, True) # nodes are accessible - IN_PROGRESS = (3, True) # active phase; nodes are not accessible - POST = (4, True) # nodes are accessible; some change are still in progress + PREPARATION = (2, True) # hosts are accessible + IN_PROGRESS = (3, True) # active phase; hosts are not accessible + POST = (4, True) # hosts are accessible; some change are still in progress COMPLETED = (5, True) # all changes are completed def __new__(cls, value: int, is_switchover_active_or_completed: bool): @@ -130,7 +130,7 @@ def __init__( connect_routings: Optional[List[ConnectRouting]] = None, execute_routings: Optional[List[ExecuteRouting]] = None, role_by_host: Optional[ConcurrentDict[str, BlueGreenRole]] = None, - corresponding_nodes: Optional[ConcurrentDict[str, Tuple[HostInfo, Optional[HostInfo]]]] = None): + corresponding_hosts: Optional[ConcurrentDict[str, Tuple[HostInfo, Optional[HostInfo]]]] = None): self.bg_id = bg_id self.phase = phase self.connect_routings = [] if connect_routings is None else list(connect_routings) @@ -139,9 +139,9 @@ def __init__( if role_by_host is not None: self.roles_by_endpoint.put_all(role_by_host) - self.corresponding_nodes: ConcurrentDict[str, Tuple[HostInfo, Optional[HostInfo]]] = ConcurrentDict() - if corresponding_nodes is not None: - self.corresponding_nodes.put_all(corresponding_nodes) + self.corresponding_hosts: ConcurrentDict[str, Tuple[HostInfo, Optional[HostInfo]]] = ConcurrentDict() + if corresponding_hosts is not None: + self.corresponding_hosts.put_all(corresponding_hosts) self.cv = Condition() @@ -483,7 +483,7 @@ def apply( return None -class SuspendUntilCorrespondingNodeFoundConnectRouting(BaseRouting, ConnectRouting): +class SuspendUntilCorrespondingHostFoundConnectRouting(BaseRouting, ConnectRouting): _TELEMETRY_SWITCHOVER: ClassVar[str] = "Blue/Green switchover" _SLEEP_TIME_MS = 100 @@ -503,14 +503,14 @@ def apply( is_initial_connection: bool, connect_func: Callable, plugin_service: PluginService) -> Optional[Connection]: - logger.debug("SuspendConnectRouting.WaitConnectUntilCorrespondingNodeFound", host_info.host) + logger.debug("SuspendConnectRouting.WaitConnectUntilCorrespondingHostFound", host_info.host) telemetry_factory = plugin_service.get_telemetry_factory() telemetry_context = telemetry_factory.open_telemetry_context( - SuspendUntilCorrespondingNodeFoundConnectRouting._TELEMETRY_SWITCHOVER, TelemetryTraceLevel.NESTED) + SuspendUntilCorrespondingHostFoundConnectRouting._TELEMETRY_SWITCHOVER, TelemetryTraceLevel.NESTED) bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) - corresponding_pair = None if bg_status is None else bg_status.corresponding_nodes.get(host_info.host) + corresponding_pair = None if bg_status is None else bg_status.corresponding_hosts.get(host_info.host) timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) start_time_sec = time.time() @@ -521,28 +521,28 @@ def apply( bg_status is not None and \ bg_status.phase != BlueGreenPhase.COMPLETED and \ (corresponding_pair is None or corresponding_pair[1] is None): - # wait until the corresponding node is found, or until switchover is completed + # wait until the corresponding host is found, or until switchover is completed self.delay( - SuspendUntilCorrespondingNodeFoundConnectRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) + SuspendUntilCorrespondingHostFoundConnectRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) - corresponding_pair = None if bg_status is None else bg_status.corresponding_nodes.get(host_info.host) + corresponding_pair = None if bg_status is None else bg_status.corresponding_hosts.get(host_info.host) if bg_status is None or bg_status.phase == BlueGreenPhase.COMPLETED: logger.debug( - "SuspendUntilCorrespondingNodeFoundConnectRouting.CompletedContinueWithConnect", + "SuspendUntilCorrespondingHostFoundConnectRouting.CompletedContinueWithConnect", (time.time() - start_time_sec) / 1000) return None if time.time() > end_time_sec: raise TimeoutError( Messages.get_formatted( - "SuspendUntilCorrespondingNodeFoundConnectRouting.CorrespondingNodeNotFoundTryConnectLater", + "SuspendUntilCorrespondingHostFoundConnectRouting.CorrespondingHostNotFoundTryConnectLater", host_info.host, (time.time() - start_time_sec) / 1000)) logger.debug( Messages.get_formatted( - "SuspendUntilCorrespondingNodeFoundConnectRouting.CorrespondingNodeFoundContinueWithConnect", + "SuspendUntilCorrespondingHostFoundConnectRouting.CorrespondingHostFoundContinueWithConnect", host_info.host, (time.time() - start_time_sec) / 1000)) finally: @@ -1042,7 +1042,7 @@ def _collect_status(self): self._close_connection() self._panic_mode.set() else: - # We are already connected to the right node. + # We are already connected to the right host. self._is_host_info_correct = True self._panic_mode.clear() @@ -1147,9 +1147,9 @@ def _update_ip_address_flags(self): self._all_start_topology_endpoints_removed = ( bool(self._start_topology) and all( - self._start_ip_addresses_by_host.get(node.host) is not None and - self._current_ip_addresses_by_host.get(node.host) is None - for node in self._start_topology + self._start_ip_addresses_by_host.get(host_info.host) is not None and + self._current_ip_addresses_by_host.get(host_info.host) is None + for host_info in self._start_topology ) ) @@ -1161,7 +1161,7 @@ def _update_ip_address_flags(self): self._all_topology_changed = ( current_topology_copy and start_topology_hosts and - all(node.host not in start_topology_hosts for node in current_topology_copy)) + all(host_info.host not in start_topology_hosts for host_info in current_topology_copy)) def _has_all_start_topology_ip_changed(self) -> bool: if not self._start_topology: @@ -1208,19 +1208,19 @@ def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str) self._latest_context_hash = 0 self._interim_statuses: List[Optional[BlueGreenInterimStatus]] = [None, None] self._host_ip_addresses: ConcurrentDict[str, ValueContainer[str]] = ConcurrentDict() - # The second element of the Tuple is None when no corresponding node is found. - self._corresponding_nodes: ConcurrentDict[str, Tuple[HostInfo, Optional[HostInfo]]] = ConcurrentDict() + # The second element of the Tuple is None when no corresponding host is found. + self._corresponding_hosts: ConcurrentDict[str, Tuple[HostInfo, Optional[HostInfo]]] = ConcurrentDict() # Keys are host URLs (port excluded) self._roles_by_host: ConcurrentDict[str, BlueGreenRole] = ConcurrentDict() self._iam_auth_success_hosts: ConcurrentDict[str, ConcurrentSet[str]] = ConcurrentDict() - self._green_node_name_change_times: ConcurrentDict[str, datetime] = ConcurrentDict() + self._green_host_name_change_times: ConcurrentDict[str, datetime] = ConcurrentDict() self._summary_status: Optional[BlueGreenStatus] = None self._latest_phase = BlueGreenPhase.NOT_CREATED self._rollback = False self._blue_dns_update_completed = False self._green_dns_removed = False self._green_topology_changed = False - self._all_green_nodes_changed_name = False + self._all_green_hosts_changed_name = False self._post_status_end_time_ns = 0 self._process_status_lock = RLock() self._status_check_intervals_ms: Dict[BlueGreenIntervalRate, int] = {} @@ -1307,7 +1307,7 @@ def _process_interim_status(self, bg_role: BlueGreenRole, interim_status: BlueGr # Update role_by_host based on the provided host names. self._roles_by_host.put_all({host_name.lower(): bg_role for host_name in interim_status.host_names}) - self._update_corresponding_nodes() + self._update_corresponding_hosts() self._update_summary_status(bg_role, interim_status) self._update_monitors() self._update_status_cache() @@ -1316,7 +1316,7 @@ def _process_interim_status(self, bg_role: BlueGreenRole, interim_status: BlueGr self._reset_context_when_completed() def _get_context_hash(self) -> int: - result = self._get_value_hash(1, str(self._all_green_nodes_changed_name)) + result = self._get_value_hash(1, str(self._all_green_hosts_changed_name)) result = self._get_value_hash(result, str(len(self._iam_auth_success_hosts))) return result @@ -1343,13 +1343,13 @@ def _update_phase(self, bg_role: BlueGreenRole, interim_status: BlueGreenInterim if interim_status.phase.value >= self._latest_phase.value: self._latest_phase = interim_status.phase - def _update_corresponding_nodes(self): + def _update_corresponding_hosts(self): """ - Update corresponding nodes. The blue writer node is mapped to the green writer node, and each blue reader node is - mapped to a green reader node + Update corresponding hosts. The blue writer host is mapped to the green writer host, and each blue reader host is + mapped to a green reader host """ - self._corresponding_nodes.clear() + self._corresponding_hosts.clear() source_status = self._interim_statuses[BlueGreenRole.SOURCE.value] target_status = self._interim_statuses[BlueGreenRole.TARGET.value] if source_status is None or target_status is None: @@ -1363,16 +1363,16 @@ def _update_corresponding_nodes(self): if blue_writer_host_info is not None: # green_writer_host_info may be None, but that will be handled properly by the corresponding routing. - self._corresponding_nodes.put( + self._corresponding_hosts.put( blue_writer_host_info.host, (blue_writer_host_info, green_writer_host_info)) if sorted_blue_readers: - # Map blue readers to green nodes + # Map blue readers to green hosts if sorted_green_readers: # Map each to blue reader to a green reader. green_index = 0 for blue_host_info in sorted_blue_readers: - self._corresponding_nodes.put( + self._corresponding_hosts.put( blue_host_info.host, (blue_host_info, sorted_green_readers[green_index])) green_index += 1 # The modulo operation prevents us from exceeding the bounds of sorted_green_readers if there are @@ -1380,9 +1380,9 @@ def _update_corresponding_nodes(self): # same green reader. green_index %= len(sorted_green_readers) else: - # There's no green readers - map all blue reader nodes to the green writer + # There's no green readers - map all blue reader hosts to the green writer for blue_host_info in sorted_blue_readers: - self._corresponding_nodes.put(blue_host_info.host, (blue_host_info, green_writer_host_info)) + self._corresponding_hosts.put(blue_host_info.host, (blue_host_info, green_writer_host_info)) if source_status.host_names and target_status.host_names: blue_hosts = source_status.host_names @@ -1396,7 +1396,7 @@ def _update_corresponding_nodes(self): (green_host for green_host in green_hosts if self._rds_utils.is_writer_cluster_dns(green_host)), None) if blue_cluster_host and green_cluster_host: - self._corresponding_nodes.put_if_absent( + self._corresponding_hosts.put_if_absent( blue_cluster_host, (HostInfo(host=blue_cluster_host), HostInfo(host=green_cluster_host))) # Map blue reader cluster host to green reader cluster host. @@ -1407,7 +1407,7 @@ def _update_corresponding_nodes(self): (green_host for green_host in green_hosts if self._rds_utils.is_reader_cluster_dns(green_host)), None) if blue_reader_cluster_host and green_reader_cluster_host: - self._corresponding_nodes.put_if_absent( + self._corresponding_hosts.put_if_absent( blue_reader_cluster_host, (HostInfo(host=blue_reader_cluster_host), HostInfo(host=green_reader_cluster_host))) @@ -1429,7 +1429,7 @@ def _update_corresponding_nodes(self): ) if corresponding_green_host: - self._corresponding_nodes.put_if_absent( + self._corresponding_hosts.put_if_absent( blue_host, (HostInfo(blue_host), HostInfo(corresponding_green_host))) def _get_writer_host(self, bg_role: BlueGreenRole) -> Optional[HostInfo]: @@ -1505,7 +1505,7 @@ def _start_switchover_timer(self): def _get_status_of_created(self) -> BlueGreenStatus: """ - New connect requests: go to blue or green nodes; default behaviour; no routing. + New connect requests: go to blue or green hosts; default behaviour; no routing. Existing connections: default behaviour; no action. Execute JDBC calls: default behaviour; no action. """ @@ -1515,7 +1515,7 @@ def _get_status_of_created(self) -> BlueGreenStatus: [], [], self._roles_by_host, - self._corresponding_nodes + self._corresponding_hosts ) def _get_status_of_preparation(self): @@ -1540,7 +1540,7 @@ def _get_status_of_preparation(self): connect_routings, [], self._roles_by_host, - self._corresponding_nodes + self._corresponding_hosts ) def _is_switchover_timer_expired(self) -> bool: @@ -1549,11 +1549,11 @@ def _is_switchover_timer_expired(self) -> bool: def _get_blue_ip_address_connect_routings(self) -> List[ConnectRouting]: connect_routings: List[ConnectRouting] = [] for host, role in self._roles_by_host.items(): - node_pair = self._corresponding_nodes.get(host) - if role == BlueGreenRole.TARGET or node_pair is None: + host_pair = self._corresponding_hosts.get(host) + if role == BlueGreenRole.TARGET or host_pair is None: continue - blue_host_info = node_pair[0] + blue_host_info = host_pair[0] blue_ip = self._host_ip_addresses.get(blue_host_info.host) if blue_ip is None or not blue_ip.is_present(): blue_ip_host_info = blue_host_info @@ -1603,7 +1603,7 @@ def _get_status_of_in_progress(self) -> BlueGreenStatus: if address_container.is_present()} for ip_address in ip_addresses: if self._suspend_blue_connections_when_in_progress: - # Check if the IP address belongs to one of the blue nodes. + # Check if the IP address belongs to one of the blue hosts. interim_status = self._interim_statuses[BlueGreenRole.SOURCE.value] if interim_status is not None and self._interim_status_contains_ip_address(interim_status, ip_address): host_connect_routing = SuspendConnectRouting(ip_address, None, self._bg_id) @@ -1612,7 +1612,7 @@ def _get_status_of_in_progress(self) -> BlueGreenStatus: connect_routings.extend([host_connect_routing, host_port_connect_routing]) continue - # Check if the IP address belongs to one of the green nodes. + # Check if the IP address belongs to one of the green hosts. interim_status = self._interim_statuses[BlueGreenRole.TARGET.value] if interim_status is not None and self._interim_status_contains_ip_address(interim_status, ip_address): host_connect_routing = SuspendConnectRouting(ip_address, None, self._bg_id) @@ -1626,9 +1626,9 @@ def _get_status_of_in_progress(self) -> BlueGreenStatus: SuspendExecuteRouting(None, BlueGreenRole.SOURCE, self._bg_id), SuspendExecuteRouting(None, BlueGreenRole.TARGET, self._bg_id)] - # All traffic through connections with IP addresses that belong to blue or green nodes should be suspended. + # All traffic through connections with IP addresses that belong to blue or green hosts should be suspended. for ip_address in ip_addresses: - # Check if the IP address belongs to one of the blue nodes. + # Check if the IP address belongs to one of the blue hosts. interim_status = self._interim_statuses[BlueGreenRole.SOURCE.value] if interim_status is not None and self._interim_status_contains_ip_address(interim_status, ip_address): host_execute_routing = SuspendExecuteRouting(ip_address, None, self._bg_id) @@ -1637,7 +1637,7 @@ def _get_status_of_in_progress(self) -> BlueGreenStatus: execute_routings.extend([host_execute_routing, host_port_execute_routing]) continue - # Check if the IP address belongs to one of the green nodes. + # Check if the IP address belongs to one of the green hosts. interim_status = self._interim_statuses[BlueGreenRole.TARGET.value] if interim_status is not None and self._interim_status_contains_ip_address(interim_status, ip_address): host_execute_routing = SuspendExecuteRouting(ip_address, None, self._bg_id) @@ -1654,7 +1654,7 @@ def _get_status_of_in_progress(self) -> BlueGreenStatus: connect_routings, execute_routings, self._roles_by_host, - self._corresponding_nodes + self._corresponding_hosts ) def _interim_status_contains_ip_address(self, interim_status: BlueGreenInterimStatus, ip_address: str) -> bool: @@ -1677,35 +1677,35 @@ def _get_status_of_post(self) -> BlueGreenStatus: self._get_post_status_connect_routings(), [], self._roles_by_host, - self._corresponding_nodes + self._corresponding_hosts ) def _get_post_status_connect_routings(self) -> List[ConnectRouting]: - if self._blue_dns_update_completed and self._all_green_nodes_changed_name: + if self._blue_dns_update_completed and self._all_green_hosts_changed_name: return [] if self._green_dns_removed else [RejectConnectRouting(None, BlueGreenRole.TARGET)] routings: List[ConnectRouting] = [] - # New connect calls to blue nodes should be routed to green nodes + # New connect calls to blue hosts should be routed to green hosts for host, role in self._roles_by_host.items(): - if role != BlueGreenRole.SOURCE or host not in self._corresponding_nodes.keys(): + if role != BlueGreenRole.SOURCE or host not in self._corresponding_hosts.keys(): continue blue_host = host is_blue_host_instance = self._rds_utils.is_rds_instance(blue_host) - node_pair = self._corresponding_nodes.get(blue_host) - blue_host_info = None if node_pair is None else node_pair[0] - green_host_info = None if node_pair is None else node_pair[1] + host_pair = self._corresponding_hosts.get(blue_host) + blue_host_info = None if host_pair is None else host_pair[0] + green_host_info = None if host_pair is None else host_pair[1] if green_host_info is None: - # The corresponding green node was not found. We need to suspend the connection request. - host_suspend_routing = SuspendUntilCorrespondingNodeFoundConnectRouting(blue_host, role, self._bg_id) + # The corresponding green host was not found. We need to suspend the connection request. + host_suspend_routing = SuspendUntilCorrespondingHostFoundConnectRouting(blue_host, role, self._bg_id) interim_status = self._interim_statuses[role.value] if interim_status is None: continue host_and_port = self._get_host_and_port(blue_host, interim_status.port) host_port_suspend_routing = ( - SuspendUntilCorrespondingNodeFoundConnectRouting(host_and_port, None, self._bg_id)) + SuspendUntilCorrespondingHostFoundConnectRouting(host_and_port, None, self._bg_id)) routings.extend([host_suspend_routing, host_port_suspend_routing]) else: green_host = green_host_info.host @@ -1718,10 +1718,10 @@ def _get_post_status_connect_routings(self) -> List[ConnectRouting]: # Check whether the green host has already been connected a non-prefixed blue IAM host name. if self._is_already_successfully_connected(green_host, blue_host): - # Green node has already changed its name, and it's not a new non-prefixed blue node. + # Green host has already changed its name, and it's not a new non-prefixed blue host. iam_hosts: Optional[Tuple[HostInfo, ...]] = None if blue_host_info is None else (blue_host_info,) else: - # The green node has not yet changed ist name, so we need to try both possible IAM hosts. + # The green host has not yet changed ist name, so we need to try both possible IAM hosts. iam_hosts = (green_host_info,) if blue_host_info is None else (green_host_info, blue_host_info) iam_auth_success_handler = None if is_blue_host_instance \ @@ -1753,8 +1753,8 @@ def _register_iam_host(self, connect_host: str, iam_host: str): if connect_host != iam_host: if success_hosts is not None and iam_host in success_hosts: - self._green_node_name_change_times.compute_if_absent(connect_host, lambda _: datetime.now()) - logger.debug("BlueGreenStatusProvider.GreenNodeChangedName", connect_host, iam_host) + self._green_host_name_change_times.compute_if_absent(connect_host, lambda _: datetime.now()) + logger.debug("BlueGreenStatusProvider.GreenHostChangedName", connect_host, iam_host) success_hosts.add(iam_host) if connect_host != iam_host: @@ -1765,10 +1765,10 @@ def _register_iam_host(self, connect_host: str, iam_host: str): if iam_hosts # Filter out empty sets ) - if all_hosts_changed_names and not self._all_green_nodes_changed_name: - logger.debug("BlueGreenStatusProvider.AllGreenNodesChangedName") - self._all_green_nodes_changed_name = True - self._store_event_phase_time("Green node certificates changed") + if all_hosts_changed_names and not self._all_green_hosts_changed_name: + logger.debug("BlueGreenStatusProvider.AllGreenHostsChangedName") + self._all_green_hosts_changed_name = True + self._store_event_phase_time("Green host certificates changed") def _get_status_of_completed(self) -> BlueGreenStatus: if self._is_switchover_timer_expired(): @@ -1777,7 +1777,7 @@ def _get_status_of_completed(self) -> BlueGreenStatus: return self._get_status_of_created() return BlueGreenStatus( - self._bg_id, BlueGreenPhase.COMPLETED, [], [], self._roles_by_host, self._corresponding_nodes) + self._bg_id, BlueGreenPhase.COMPLETED, [], [], self._roles_by_host, self._corresponding_hosts) if not self._blue_dns_update_completed or not self._green_dns_removed: return self._get_status_of_post() @@ -1836,21 +1836,21 @@ def _update_status_cache(self): def _log_current_context(self): logger.debug(f"[bg_id: '{self._bg_id}'] Summary status: \n{self._summary_status}") - nodes_str = "\n".join( - f" {blue_host} -> {node_pair[1] if node_pair else None}" - for blue_host, node_pair in self._corresponding_nodes.items()) - logger.debug(f"Corresponding nodes:\n{nodes_str}") + hosts_str = "\n".join( + f" {blue_host} -> {host_pair[1] if host_pair else None}" + for blue_host, host_pair in self._corresponding_hosts.items()) + logger.debug(f"Corresponding hosts:\n{hosts_str}") phase_times = \ "\n".join(f" {event_desc} -> {info.date_time}" for event_desc, info in self._phase_times_ns.items()) logger.debug(f"Phase times:\n{phase_times}") change_name_times = \ - "\n".join(f" {host} -> {date_time}" for host, date_time in self._green_node_name_change_times.items()) - logger.debug(f"Green node certificate change times:\n{change_name_times}") + "\n".join(f" {host} -> {date_time}" for host, date_time in self._green_host_name_change_times.items()) + logger.debug(f"Green host certificate change times:\n{change_name_times}") logger.debug("\n" f" latest_status_phase: {self._latest_phase}\n" f" blue_dns_update_completed: {self._blue_dns_update_completed}\n" f" green_dns_removed: {self._green_dns_removed}\n" - f" all_green_nodes_changed_name: {self._all_green_nodes_changed_name}\n" + f" all_green_hosts_changed_name: {self._all_green_hosts_changed_name}\n" f" green_topology_changed: {self._green_topology_changed}\n") def _log_switchover_final_summary(self): @@ -1897,16 +1897,16 @@ def _reset_context_when_completed(self): self._blue_dns_update_completed = False self._green_dns_removed = False self._green_topology_changed = False - self._all_green_nodes_changed_name = False + self._all_green_hosts_changed_name = False self._post_status_end_time_ns = 0 self._interim_status_hashes = [0, 0] self._latest_context_hash = 0 self._interim_statuses = [None, None] self._host_ip_addresses.clear() - self._corresponding_nodes.clear() + self._corresponding_hosts.clear() self._roles_by_host.clear() self._iam_auth_success_hosts.clear() - self._green_node_name_change_times.clear() + self._green_host_name_change_times.clear() @dataclass diff --git a/aws_advanced_python_wrapper/database_dialect.py b/aws_advanced_python_wrapper/database_dialect.py index da60fb11..9084e7b2 100644 --- a/aws_advanced_python_wrapper/database_dialect.py +++ b/aws_advanced_python_wrapper/database_dialect.py @@ -324,7 +324,7 @@ class RdsPgDialect(PgDatabaseDialect, BlueGreenDialect): _DIALECT_UPDATE_CANDIDATES = (DialectCode.AURORA_PG, DialectCode.MULTI_AZ_PG) _BG_STATUS_QUERY = (f"SELECT version, endpoint, port, role, status " - f"FROM rds_tools.show_topology('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')") + f"FROM rds_tools.show_topology('aws_advanced_python_wrapper-{DriverInfo.DRIVER_VERSION}')") _BG_STATUS_EXISTS_QUERY = "SELECT 'rds_tools.show_topology'::regproc" def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: @@ -432,7 +432,7 @@ class AuroraPgDialect(PgDatabaseDialect, TopologyAwareDatabaseDialect, BlueGreen _IS_READER_QUERY = "SELECT pg_is_in_recovery()" _BG_STATUS_QUERY = (f"SELECT version, endpoint, port, role, status " - f"FROM get_blue_green_fast_switchover_metadata('aws_jdbc_driver-{DriverInfo.DRIVER_VERSION}')") + f"FROM get_blue_green_fast_switchover_metadata('aws_advanced_python_wrapper-{DriverInfo.DRIVER_VERSION}')") _BG_STATUS_EXISTS_QUERY = "SELECT 'get_blue_green_fast_switchover_metadata'::regproc" @property diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index 57a8f7e2..fc6c428d 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -64,7 +64,7 @@ BlueGreenStatusMonitor.UsesVersion=[BlueGreenStatusMonitor] [{}] Blue/Green depl BlueGreenStatusProvider.BlueDnsCompleted=[BlueGreenStatusProvider] [bgdId: '{}'] Blue DNS update completed. BlueGreenStatusProvider.GreenDnsRemoved=[BlueGreenStatusProvider] [bgdId: '{}'] Green DNS removed. -BlueGreenStatusProvider.GreenNodeChangedName=[BlueGreenStatusProvider] Green node '{}' has changed its name to '{}'. +BlueGreenStatusProvider.GreenHostChangedName=[BlueGreenStatusProvider] Green host '{}' has changed its name to '{}'. BlueGreenStatusProvider.GreenTopologyChanged=[BlueGreenStatusProvider] [bgdId: '{}'] Green topology changed. BlueGreenStatusProvider.InterimStatus=[BlueGreenStatusProvider] [bgdId: '{}', role: {}] {} BlueGreenStatusProvider.NoCurrentHostInfo=[BlueGreenStatusProvider] [bgdId: '{}'] Unable to create Blue/Green monitors because information about the current host was not found. @@ -347,15 +347,15 @@ SubstituteConnectRouting.RequireIamHost=[SubstituteConnectRouting] Connecting wi SuspendConnectRouting.InProgressSuspendConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is in progress. The 'connect' call will be delayed until switchover is completed. SuspendConnectRouting.InProgressTryConnectLater=[SuspendConnectRouting] Blue/Green Deployment switchover is still in progress after {} seconds. Try to connect again later. -SuspendConnectRouting.SwitchoverCompleteContinueWithConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is completed. Continue with connect call. The call was held for {} ms. +SuspendConnectRouting.SwitchoverCompleteContinueWithConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is completed. Continue with connect call. The call was suspended for {} ms. SuspendExecuteRouting.InProgressSuspendMethod=[SuspendExecuteRouting] Blue/Green Deployment switchover is in progress. Suspend '{}' call until switchover is completed. SuspendExecuteRouting.InProgressTryMethodLater=[SuspendExecuteRouting] Blue/Green Deployment switchover is still in progress after {} ms. Try '{}' again later. -SuspendExecuteRouting.SwitchoverCompleteContinueWithMethod=[SuspendExecuteRouting] Blue/Green Deployment switchover is completed. Continue with '{}' call. The call was held for {} ms. +SuspendExecuteRouting.SwitchoverCompleteContinueWithMethod=[SuspendExecuteRouting] Blue/Green Deployment switchover is completed. Continue with '{}' call. The call was suspended for {} ms. -SuspendUntilCorrespondingNodeFoundConnectRouting.CompletedContinueWithConnect=[SuspendConnectUntilCorrespondingNodeFoundConnectRouting] Blue/Green Deployment status is completed. Continue with 'connect' call. The call was held for {} ms. -SuspendUntilCorrespondingNodeFoundConnectRouting.CorrespondingNodeFoundContinueWithConnect=[SuspendConnectUntilCorrespondingNodeFoundConnectRouting] The corresponding node for '{}' was found. Continue with 'connect' call. The call was held for {} ms. -SuspendUntilCorrespondingNodeFoundConnectRouting.CorrespondingNodeNotFoundTryConnectLater=[SuspendConnectUntilCorrespondingNodeFoundConnectRouting] Blue/Green Deployment switchover is still in progress and the corresponding node for '{}' was not found after {} ms. Try to connect again later. +SuspendUntilCorrespondingHostFoundConnectRouting.CompletedContinueWithConnect=[SuspendConnectUntilCorrespondingHostFoundConnectRouting] Blue/Green Deployment status is completed. Continue with 'connect' call. The call was suspended for {} ms. +SuspendUntilCorrespondingHostFoundConnectRouting.CorrespondingHostFoundContinueWithConnect=[SuspendConnectUntilCorrespondingHostFoundConnectRouting] The corresponding host for '{}' was found. Continue with 'connect' call. The call was suspended for {} ms. +SuspendUntilCorrespondingHostFoundConnectRouting.CorrespondingHostNotFoundTryConnectLater=[SuspendConnectUntilCorrespondingHostFoundConnectRouting] Blue/Green Deployment switchover is still in progress and the corresponding host for '{}' was not found after {} ms. Try to connect again later. Testing.CantParse=[Testing] Can't parse {}. Testing.DisabledConnectivity=[Testing] Disabled connectivity to {}. diff --git a/aws_advanced_python_wrapper/utils/properties.py b/aws_advanced_python_wrapper/utils/properties.py index 1c70788c..d56bf5c5 100644 --- a/aws_advanced_python_wrapper/utils/properties.py +++ b/aws_advanced_python_wrapper/utils/properties.py @@ -381,7 +381,7 @@ class WrapperProperties: 180_000) # 3 minutes BG_SUSPEND_NEW_BLUE_CONNECTIONS = WrapperProperty( "bg_suspend_new_blue_connections", - "Enables Blue/Green Deployment switchover to suspend new blue connection requests while the " + "Enables Blue/Green Deployment switchover to suspend new blue connection requests while the " "switchover process is in progress.", False) From 05fc6d348c24800cbe7c0acb8674bec46946d74d Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 11 Jul 2025 16:08:07 -0700 Subject: [PATCH 25/41] bug fixes wip --- .../blue_green_plugin.py | 75 ++++++++++--------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index b9a542f7..0433b9d8 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -295,16 +295,16 @@ def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): self._bg_role = bg_role def delay(self, delay_ms: int, bg_status: Optional[BlueGreenStatus], plugin_service: PluginService, bg_id: str): - end_time_sec = time.time() + (delay_ms / 1_000) + end_time_sec = time.time() + (delay_ms // 1_000) min_delay_ms = min(delay_ms, BaseRouting._MIN_SLEEP_MS) if bg_status is None: - time.sleep(delay_ms / 1_000) + time.sleep(delay_ms // 1_000) return while bg_status is plugin_service.get_status(BlueGreenStatus, bg_id) and time.time() < end_time_sec: with bg_status.cv: - bg_status.cv.wait(min_delay_ms / 1_000) + bg_status.cv.wait(min_delay_ms // 1_000) def is_match(self, host_info: Optional[HostInfo], bg_role: BlueGreenRole) -> bool: if self._endpoint is None: @@ -459,7 +459,7 @@ def apply( bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) start_time_sec = time.time() - end_time_sec = start_time_sec + timeout_ms / 1_000 + end_time_sec = start_time_sec + timeout_ms // 1_000 try: while time.time() < end_time_sec and \ @@ -475,7 +475,7 @@ def apply( logger.debug( Messages.get_formatted( "SuspendConnectRouting.SwitchoverCompleteContinueWithConnect", - (time.time() - start_time_sec) / 1000)) + (time.time() - start_time_sec) // 1000)) finally: telemetry_context.close_context() @@ -514,7 +514,7 @@ def apply( timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) start_time_sec = time.time() - end_time_sec = start_time_sec + timeout_ms / 1_000 + end_time_sec = start_time_sec + timeout_ms // 1_000 try: while time.time() < end_time_sec and \ @@ -530,7 +530,7 @@ def apply( if bg_status is None or bg_status.phase == BlueGreenPhase.COMPLETED: logger.debug( "SuspendUntilCorrespondingHostFoundConnectRouting.CompletedContinueWithConnect", - (time.time() - start_time_sec) / 1000) + (time.time() - start_time_sec) // 1000) return None if time.time() > end_time_sec: @@ -538,13 +538,13 @@ def apply( Messages.get_formatted( "SuspendUntilCorrespondingHostFoundConnectRouting.CorrespondingHostNotFoundTryConnectLater", host_info.host, - (time.time() - start_time_sec) / 1000)) + (time.time() - start_time_sec) // 1000)) logger.debug( Messages.get_formatted( "SuspendUntilCorrespondingHostFoundConnectRouting.CorrespondingHostFoundContinueWithConnect", host_info.host, - (time.time() - start_time_sec) / 1000)) + (time.time() - start_time_sec) // 1000)) finally: telemetry_context.close_context() @@ -600,7 +600,7 @@ def apply( bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) start_time_sec = time.time() - end_time_sec = start_time_sec + timeout_ms / 1_000 + end_time_sec = start_time_sec + timeout_ms // 1_000 try: while time.time() < end_time_sec and \ @@ -619,7 +619,7 @@ def apply( Messages.get_formatted( "SuspendExecuteRouting.SwitchoverCompleteContinueWithMethod", method_name, - (time.time() - start_time_sec) / 1000)) + (time.time() - start_time_sec) // 1000)) finally: telemetry_context.close_context() @@ -896,6 +896,7 @@ def _run(self): def _open_connection(self): conn = self._connection + # TODO: do we need to lock while we check the condition and start the thread if it we don't have a conn? if not self._is_connection_closed(conn): return @@ -919,7 +920,7 @@ def _open_connection_task(self): host_info = self._initial_host_info self._connected_ip_address = None ip_address = None - self._is_host_info_correct = False + self._is_host_info_correct.clear() try: if self.use_ip_address.is_set() and ip_address is not None: @@ -941,11 +942,15 @@ def _open_connection_task(self): self._panic_mode.clear() self._notify_changes() - except Exception: + except Exception as e: # Attempt to open connection failed. - self._connection = None - self._panic_mode.set() - self._notify_changes() + import traceback + print(traceback.format_exc(), flush=True) + raise e + # TODO: change back + # self._connection = None + # self._panic_mode.set() + # self._notify_changes() def _get_ip_address(self, host: str) -> ValueContainer[str]: try: @@ -1030,7 +1035,7 @@ def _collect_status(self): self._rds_utils.is_not_old_instance(status.endpoint)} self._host_names.update(current_host_names) - if not self._is_host_info_correct and status_info is not None: + if not self._is_host_info_correct.is_set() and status_info is not None: # We connected to an initial host info that might not be the desired blue or green cluster. Let's check # if we need to reconnect to the correct one. status_info_ip_address = self._get_ip_address(status_info.endpoint) @@ -1038,15 +1043,15 @@ def _collect_status(self): if connected_ip_address is not None and connected_ip_address != status_info_ip_address: # We are not connected to the desired blue or green cluster, we need to reconnect. self._connection_host_info = HostInfo(host=status_info.endpoint, port=status_info.port) - self._is_host_info_correct = True + self._is_host_info_correct.set() self._close_connection() self._panic_mode.set() else: # We are already connected to the right host. - self._is_host_info_correct = True + self._is_host_info_correct.set() self._panic_mode.clear() - if self._is_host_info_correct and self._host_list_provider is not None: + if self._is_host_info_correct.is_set() and self._host_list_provider is not None: # A connection to the correct cluster (blue or green) has been stablished. Let's initialize the host # list provider. self._init_host_list_provider() @@ -1068,7 +1073,7 @@ def _close_connection(self): pass def _init_host_list_provider(self): - if self._host_list_provider is not None or not self._is_host_info_correct: + if self._host_list_provider is not None or not self._is_host_info_correct.is_set(): return # We need to instantiate a separate HostListProvider with a special unique cluster ID to avoid interference with @@ -1096,7 +1101,7 @@ def _delay(self, delay_ms: int): end_ns = start_ns + delay_ms * 1_000_000 initial_interval_rate = self.interval_rate initial_panic_mode_val = self._panic_mode.is_set() - min_delay_sec = min(delay_ms, 50) / 1_000 + min_delay_sec = min(delay_ms, 50) // 1_000 while self.interval_rate == initial_interval_rate and \ perf_counter_ns() < end_ns and \ @@ -1114,11 +1119,11 @@ def collect_topology(self): return self._current_topology = self._host_list_provider.force_refresh(conn) - if self.should_collect_topology: + if self.should_collect_topology.is_set(): self._start_topology = self._current_topology current_topology_copy = self._current_topology - if current_topology_copy is not None and self.should_collect_topology: + if current_topology_copy is not None and self.should_collect_topology.is_set(): self._host_names.update({host_info.host for host_info in current_topology_copy}) def _collect_ip_addresses(self): @@ -1127,18 +1132,18 @@ def _collect_ip_addresses(self): for host in self._host_names: self._current_ip_addresses_by_host.put_if_absent(host, self._get_ip_address(host)) - if self.should_collect_ip_addresses: + if self.should_collect_ip_addresses.is_set(): self._start_ip_addresses_by_host.clear() self._start_ip_addresses_by_host.put_all(self._current_ip_addresses_by_host) def _update_ip_address_flags(self): - if self.should_collect_topology: + if self.should_collect_topology.is_set(): self._all_start_topology_ip_changed = False self._all_start_topology_endpoints_removed = False self._all_topology_changed = False return - if not self.should_collect_ip_addresses: + if not self.should_collect_ip_addresses.is_set(): # Check whether all hosts in start_topology resolve to new IP addresses self._all_start_topology_ip_changed = self._has_all_start_topology_ip_changed() @@ -1153,7 +1158,7 @@ def _update_ip_address_flags(self): ) ) - if not self.should_collect_topology: + if not self.should_collect_topology.is_set(): # Check whether all hosts in current_topology do not exist in start_topology start_topology_hosts = set() if self._start_topology is None else \ {host_info.host for host_info in self._start_topology} @@ -1280,9 +1285,9 @@ def _get_monitoring_props(self) -> Properties: monitoring_props.pop(key, None) monitoring_props.put_if_absent( - WrapperProperties.CONNECT_TIMEOUT_SEC.name, BlueGreenStatusProvider._DEFAULT_CONNECT_TIMEOUT_MS / 1_000) + WrapperProperties.CONNECT_TIMEOUT_SEC.name, BlueGreenStatusProvider._DEFAULT_CONNECT_TIMEOUT_MS // 1_000) monitoring_props.put_if_absent( - WrapperProperties.SOCKET_TIMEOUT_SEC.name, BlueGreenStatusProvider._DEFAULT_SOCKET_TIMEOUT_MS / 1_000) + WrapperProperties.SOCKET_TIMEOUT_SEC.name, BlueGreenStatusProvider._DEFAULT_SOCKET_TIMEOUT_MS // 1_000) return monitoring_props def _process_interim_status(self, bg_role: BlueGreenRole, interim_status: BlueGreenInterimStatus): @@ -1792,13 +1797,13 @@ def _update_monitors(self): monitor.interval_rate = BlueGreenIntervalRate.BASELINE monitor.should_collect_ip_addresses.clear() monitor.should_collect_topology.clear() - monitor.use_ip_address = False + monitor.use_ip_address.clear() elif phase == BlueGreenPhase.CREATED: for monitor in self._monitors: monitor.interval_rate = BlueGreenIntervalRate.INCREASED monitor.should_collect_ip_addresses.set() monitor.should_collect_topology.set() - monitor.use_ip_address = False + monitor.use_ip_address.clear() if self._rollback: monitor.reset_collected_data() elif phase == BlueGreenPhase.PREPARATION \ @@ -1808,18 +1813,18 @@ def _update_monitors(self): monitor.interval_rate = BlueGreenIntervalRate.HIGH monitor.should_collect_ip_addresses.clear() monitor.should_collect_topology.clear() - monitor.use_ip_address = True + monitor.use_ip_address.set() elif phase == BlueGreenPhase.COMPLETED: for monitor in self._monitors: monitor.interval_rate = BlueGreenIntervalRate.BASELINE monitor.should_collect_ip_addresses.clear() monitor.should_collect_topology.clear() - monitor.use_ip_address = False + monitor.use_ip_address.clear() monitor.reset_collected_data() # Stop monitoring old1 cluster/instance. if not self._rollback and self._monitors[BlueGreenRole.SOURCE.value] is not None: - self._monitors[BlueGreenRole.SOURCE.value].stop = True + self._monitors[BlueGreenRole.SOURCE.value].stop.set() else: raise UnsupportedOperationError( Messages.get_formatted( From 6c32e64d99ff516d5a038751fc52c2ac4ecedefc Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Tue, 15 Jul 2025 10:30:14 -0700 Subject: [PATCH 26/41] Cleanup --- aws_advanced_python_wrapper/blue_green_plugin.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 0433b9d8..bcb9729e 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -942,15 +942,11 @@ def _open_connection_task(self): self._panic_mode.clear() self._notify_changes() - except Exception as e: + except Exception: # Attempt to open connection failed. - import traceback - print(traceback.format_exc(), flush=True) - raise e - # TODO: change back - # self._connection = None - # self._panic_mode.set() - # self._notify_changes() + self._connection = None + self._panic_mode.set() + self._notify_changes() def _get_ip_address(self, host: str) -> ValueContainer[str]: try: From 5cd24e0f1beda678607dac6d9e434e0586da57f7 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Wed, 16 Jul 2025 17:44:16 -0700 Subject: [PATCH 27/41] Fix timing bugs and stale bg data bug --- .../blue_green_plugin.py | 34 ++++++++----------- aws_advanced_python_wrapper/plugin_service.py | 4 +-- ...dvanced_python_wrapper_messages.properties | 1 - 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index bcb9729e..234b6dde 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -295,16 +295,16 @@ def __init__(self, endpoint: Optional[str], bg_role: Optional[BlueGreenRole]): self._bg_role = bg_role def delay(self, delay_ms: int, bg_status: Optional[BlueGreenStatus], plugin_service: PluginService, bg_id: str): - end_time_sec = time.time() + (delay_ms // 1_000) + end_time_sec = time.time() + (delay_ms / 1_000) min_delay_ms = min(delay_ms, BaseRouting._MIN_SLEEP_MS) if bg_status is None: - time.sleep(delay_ms // 1_000) + time.sleep(delay_ms / 1_000) return while bg_status is plugin_service.get_status(BlueGreenStatus, bg_id) and time.time() < end_time_sec: with bg_status.cv: - bg_status.cv.wait(min_delay_ms // 1_000) + bg_status.cv.wait(min_delay_ms / 1_000) def is_match(self, host_info: Optional[HostInfo], bg_role: BlueGreenRole) -> bool: if self._endpoint is None: @@ -459,7 +459,7 @@ def apply( bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) start_time_sec = time.time() - end_time_sec = start_time_sec + timeout_ms // 1_000 + end_time_sec = start_time_sec + timeout_ms / 1_000 try: while time.time() < end_time_sec and \ @@ -475,7 +475,7 @@ def apply( logger.debug( Messages.get_formatted( "SuspendConnectRouting.SwitchoverCompleteContinueWithConnect", - (time.time() - start_time_sec) // 1000)) + (time.time() - start_time_sec) * 1000)) finally: telemetry_context.close_context() @@ -514,7 +514,7 @@ def apply( timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) start_time_sec = time.time() - end_time_sec = start_time_sec + timeout_ms // 1_000 + end_time_sec = start_time_sec + timeout_ms / 1_000 try: while time.time() < end_time_sec and \ @@ -530,7 +530,7 @@ def apply( if bg_status is None or bg_status.phase == BlueGreenPhase.COMPLETED: logger.debug( "SuspendUntilCorrespondingHostFoundConnectRouting.CompletedContinueWithConnect", - (time.time() - start_time_sec) // 1000) + (time.time() - start_time_sec) * 1000) return None if time.time() > end_time_sec: @@ -538,13 +538,13 @@ def apply( Messages.get_formatted( "SuspendUntilCorrespondingHostFoundConnectRouting.CorrespondingHostNotFoundTryConnectLater", host_info.host, - (time.time() - start_time_sec) // 1000)) + (time.time() - start_time_sec) * 1000)) logger.debug( Messages.get_formatted( "SuspendUntilCorrespondingHostFoundConnectRouting.CorrespondingHostFoundContinueWithConnect", host_info.host, - (time.time() - start_time_sec) // 1000)) + (time.time() - start_time_sec) * 1000)) finally: telemetry_context.close_context() @@ -600,7 +600,7 @@ def apply( bg_status = plugin_service.get_status(BlueGreenStatus, self._bg_id) timeout_ms = WrapperProperties.BG_CONNECT_TIMEOUT_MS.get_int(props) start_time_sec = time.time() - end_time_sec = start_time_sec + timeout_ms // 1_000 + end_time_sec = start_time_sec + timeout_ms / 1_000 try: while time.time() < end_time_sec and \ @@ -619,7 +619,7 @@ def apply( Messages.get_formatted( "SuspendExecuteRouting.SwitchoverCompleteContinueWithMethod", method_name, - (time.time() - start_time_sec) // 1000)) + (time.time() - start_time_sec) * 1000)) finally: telemetry_context.close_context() @@ -976,6 +976,7 @@ def _collect_status(self): return status_entries = [] + conn.autocommit = True with conn.cursor() as cursor: cursor.execute(self._bg_dialect.blue_green_status_query) for record in cursor: @@ -1048,7 +1049,7 @@ def _collect_status(self): self._panic_mode.clear() if self._is_host_info_correct.is_set() and self._host_list_provider is not None: - # A connection to the correct cluster (blue or green) has been stablished. Let's initialize the host + # A connection to the correct cluster (blue or green) has been established. Let's initialize the host # list provider. self._init_host_list_provider() except Exception as e: @@ -1097,7 +1098,7 @@ def _delay(self, delay_ms: int): end_ns = start_ns + delay_ms * 1_000_000 initial_interval_rate = self.interval_rate initial_panic_mode_val = self._panic_mode.is_set() - min_delay_sec = min(delay_ms, 50) // 1_000 + min_delay_sec = min(delay_ms, 50) / 1_000 while self.interval_rate == initial_interval_rate and \ perf_counter_ns() < end_ns and \ @@ -1245,11 +1246,6 @@ def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str) return current_host_info = self._plugin_service.current_host_info - if current_host_info is None: - # TODO: raise an error instead? - logger.warning("BlueGreenStatusProvider.NoCurrentHostInfo", self._bg_id) - return - blue_monitor = BlueGreenStatusMonitor( BlueGreenRole.SOURCE, self._bg_id, @@ -1870,7 +1866,7 @@ def _log_switchover_final_summary(self): sorted_phase_entries = sorted(self._phase_times_ns.items(), key=lambda entry: entry[1].timestamp_ns) phase_time_lines = [ f"{entry[1].date_time:>28s} " - f"{'' if time_zero is None else (entry[1].timestamp_ns - time_zero.timestamp_ns) // 1_000_000:>18s} ms " + f"{'' if time_zero is None else (entry[1].timestamp_ns - time_zero.timestamp_ns) / 1_000_000:>18s} ms " f"{entry[0]:>31s}" for entry in sorted_phase_entries ] phase_times_str = "\n".join(phase_time_lines) diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 7a7043d0..37cebaac 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -304,7 +304,7 @@ def get_status(self, clazz: Type[StatusType], key: str) -> Optional[StatusType]: class PluginServiceImpl(PluginService, HostListProviderService, CanReleaseResources): - _STATUS_CACHE_EXPIRATION_NANO = 60 * 1_000_000_000 # one hour + _STATUS_CACHE_EXPIRATION_NANO = 60 * 60 * 1_000_000_000 # one hour _host_availability_expiring_cache: CacheMap[str, HostAvailability] = CacheMap() _status_cache: ClassVar[CacheMap[str, Any]] = CacheMap() @@ -423,7 +423,7 @@ def set_current_connection(self, connection: Optional[Connection], host_info: Op self.session_state_service.complete() @property - def current_host_info(self) -> Optional[HostInfo]: + def current_host_info(self) -> HostInfo: if self._current_host_info is not None: return self._current_host_info diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index fc6c428d..b2416dbf 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -67,7 +67,6 @@ BlueGreenStatusProvider.GreenDnsRemoved=[BlueGreenStatusProvider] [bgdId: '{}'] BlueGreenStatusProvider.GreenHostChangedName=[BlueGreenStatusProvider] Green host '{}' has changed its name to '{}'. BlueGreenStatusProvider.GreenTopologyChanged=[BlueGreenStatusProvider] [bgdId: '{}'] Green topology changed. BlueGreenStatusProvider.InterimStatus=[BlueGreenStatusProvider] [bgdId: '{}', role: {}] {} -BlueGreenStatusProvider.NoCurrentHostInfo=[BlueGreenStatusProvider] [bgdId: '{}'] Unable to create Blue/Green monitors because information about the current host was not found. BlueGreenStatusProvider.ResetContext=[BlueGreenStatusProvider] Resetting context. BlueGreenStatusProvider.Rollback=[BlueGreenStatusProvider] [bgdId: '{}'] Blue/Green deployment is in rollback mode. BlueGreenStatusProvider.SwitchoverTimeout=[BlueGreenStatusProvider] Blue/Green switchover has timed out. From 2e27c9dc158d59fa31ec9e6715f88e440eee9e58 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 17 Jul 2025 13:40:22 -0700 Subject: [PATCH 28/41] Address PR feedback --- .../blue_green_plugin.py | 90 +++++++++++-------- aws_advanced_python_wrapper/plugin_service.py | 2 +- 2 files changed, 52 insertions(+), 40 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 234b6dde..102f8d45 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -35,7 +35,7 @@ from dataclasses import dataclass from enum import Enum, auto from threading import Condition, Event, RLock, Thread -from typing import (Any, Callable, ClassVar, Dict, Optional, Protocol, Set, +from typing import (Any, Callable, ClassVar, Dict, Optional, Set, Tuple) from aws_advanced_python_wrapper.errors import (AwsWrapperError, @@ -69,7 +69,7 @@ class BlueGreenPhase(Enum): CREATED = (1, False) PREPARATION = (2, True) # hosts are accessible IN_PROGRESS = (3, True) # active phase; hosts are not accessible - POST = (4, True) # hosts are accessible; some change are still in progress + POST = (4, True) # hosts are accessible; some changes are still in progress COMPLETED = (5, True) # all changes are completed def __new__(cls, value: int, is_switchover_active_or_completed: bool): @@ -396,7 +396,7 @@ def apply( raise AwsWrapperError(Messages.get("SubstituteConnectRouting.RequireIamHost")) for iam_host in self._iam_hosts: - rerouted_host_info = copy(host_info) + rerouted_host_info = copy(self._substitute_host_info) rerouted_host_info.host_id = iam_host.host_id rerouted_host_info.availability = HostAvailability.AVAILABLE rerouted_host_info.add_alias(iam_host.host) @@ -425,11 +425,6 @@ def apply( "SubstituteConnectRouting.InProgressCantOpenConnection", self._substitute_host_info.url)) -class IamAuthSuccessHandler(Protocol): - def on_iam_success(self, iam_host: str): - ... - - class SuspendConnectRouting(BaseRouting, ConnectRouting): _TELEMETRY_SWITCHOVER: ClassVar[str] = "Blue/Green switchover" _SLEEP_TIME_MS = 100 @@ -462,7 +457,7 @@ def apply( end_time_sec = start_time_sec + timeout_ms / 1_000 try: - while time.time() < end_time_sec and \ + while time.time() <= end_time_sec and \ bg_status is not None and \ bg_status.phase == BlueGreenPhase.IN_PROGRESS: self.delay(SuspendConnectRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) @@ -517,10 +512,10 @@ def apply( end_time_sec = start_time_sec + timeout_ms / 1_000 try: - while time.time() < end_time_sec and \ + while time.time() <= end_time_sec and \ bg_status is not None and \ bg_status.phase != BlueGreenPhase.COMPLETED and \ - (corresponding_pair is None or corresponding_pair[1] is None): + (corresponding_pair is None or (len(corresponding_pair) > 1 and corresponding_pair[1] is None)): # wait until the corresponding host is found, or until switchover is completed self.delay( SuspendUntilCorrespondingHostFoundConnectRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) @@ -681,13 +676,16 @@ def connect( conn: Optional[Connection] = None while routing is not None and conn is None: conn = routing.apply(self, host_info, props, is_initial_connection, connect_func, self._plugin_service) - if conn is None: - latest_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) - if latest_status is not None: - self._bg_status = latest_status + if conn is not None: + break + + latest_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + if latest_status is None: + self._end_time_ns.set(perf_counter_ns()) + return self._open_direct_connection(connect_func, is_initial_connection) - routing = \ - next((r for r in self._bg_status.connect_routings if r.is_match(host_info, bg_role)), None) + routing = \ + next((r for r in self._bg_status.connect_routings if r.is_match(host_info, bg_role)), None) self._end_time_ns.set(perf_counter_ns()) if conn is None: @@ -740,7 +738,7 @@ def execute(self, target: type, method_name: str, execute_func: Callable, *args: result: ValueContainer[Any] = ValueContainer.empty() self._start_time_ns.set(perf_counter_ns()) - while routing is not None and result is None: + while routing is not None and not result.is_present(): result = routing.apply( self, self._plugin_service, @@ -750,13 +748,16 @@ def execute(self, target: type, method_name: str, execute_func: Callable, *args: execute_func, *args, **kwargs) - if not result.is_present(): - latest_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) - if latest_status is not None: - self._bg_status = latest_status + if result.is_present(): + break - routing = \ - next((r for r in self._bg_status.execute_routings if r.is_match(host_info, bg_role)), None) + latest_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) + if latest_status is None: + self._end_time_ns.set(perf_counter_ns()) + return execute_func() + + routing = \ + next((r for r in self._bg_status.execute_routings if r.is_match(host_info, bg_role)), None) self._end_time_ns.set(perf_counter_ns()) if result.is_present(): @@ -838,6 +839,7 @@ def __init__( self._connected_ip_address: Optional[str] = None self._is_host_info_correct = Event() self._has_started = Event() + self._open_connection_lock = RLock() db_dialect = self._plugin_service.database_dialect if not isinstance(db_dialect, BlueGreenDialect): @@ -895,22 +897,31 @@ def _run(self): logger.debug("BlueGreenStatusMonitor.ThreadCompleted", self._bg_role) def _open_connection(self): + if not self._is_new_conn_required(): + return + + with self._open_connection_lock: + if not self._is_new_conn_required(): + return + + self._connection = None + self._panic_mode.set() + self._open_connection_thread = \ + Thread(daemon=True, name="BlueGreenMonitorConnectionOpener", target=self._open_connection_task) + self._open_connection_thread.start() + + def _is_new_conn_required(self) -> bool: conn = self._connection - # TODO: do we need to lock while we check the condition and start the thread if it we don't have a conn? if not self._is_connection_closed(conn): - return + return False if self._open_connection_thread is not None: if self._open_connection_thread.is_alive(): - return # The task to open the connection is in progress, let's wait. + return False # The task to open the connection is in progress, let's wait. elif not self._panic_mode.is_set(): - return # The connection should be open by now since the open connection task is not running. + return False # The connection should be open by now since the open connection task is not running. - self._connection = None - self._panic_mode.set() - self._open_connection_thread = \ - Thread(daemon=True, name="BlueGreenMonitorConnectionOpener", target=self._open_connection_task) - self._open_connection_thread.start() + return True def _open_connection_task(self): host_info = self._connection_host_info @@ -997,7 +1008,6 @@ def _collect_status(self): status_entries.append(BlueGreenDbStatusInfo(version, endpoint, port, phase, bg_role)) - # Attempt to find the writer cluster status info # Attempt to find the writer cluster status info status_info = next((status for status in status_entries if self._rds_utils.is_writer_cluster_dns(status.endpoint) and @@ -1048,7 +1058,7 @@ def _collect_status(self): self._is_host_info_correct.set() self._panic_mode.clear() - if self._is_host_info_correct.is_set() and self._host_list_provider is not None: + if self._is_host_info_correct.is_set() and self._host_list_provider is None: # A connection to the correct cluster (blue or green) has been established. Let's initialize the host # list provider. self._init_host_list_provider() @@ -1171,7 +1181,7 @@ def _has_all_start_topology_ip_changed(self) -> bool: for host_info in self._start_topology: start_ip = self._start_ip_addresses_by_host.get(host_info.host) - current_ip = self._start_ip_addresses_by_host.get(host_info.host) + current_ip = self._current_ip_addresses_by_host.get(host_info.host) if start_ip is None or not start_ip.is_present() or \ current_ip is None or not current_ip.is_present(): return False @@ -1713,12 +1723,12 @@ def _get_post_status_connect_routings(self) -> List[ConnectRouting]: green_ip_host_info = copy(green_host_info) green_ip_host_info.host = green_ip_container.get() - # Check whether the green host has already been connected a non-prefixed blue IAM host name. + # Check whether the green host has already been connected to a non-prefixed blue IAM host name. if self._is_already_successfully_connected(green_host, blue_host): # Green host has already changed its name, and it's not a new non-prefixed blue host. iam_hosts: Optional[Tuple[HostInfo, ...]] = None if blue_host_info is None else (blue_host_info,) else: - # The green host has not yet changed ist name, so we need to try both possible IAM hosts. + # The green host has not yet changed its name, so we need to try both possible IAM hosts. iam_hosts = (green_host_info,) if blue_host_info is None else (green_host_info, blue_host_info) iam_auth_success_handler = None if is_blue_host_instance \ @@ -1825,6 +1835,8 @@ def _update_monitors(self): def _update_status_cache(self): latest_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) self._plugin_service.set_status(BlueGreenStatus, self._summary_status, self._bg_id) + phase = self._summary_status.phase + self._store_event_phase_time(phase.name, phase) if latest_status is not None: # Notify all waiting threads that the status has been updated. @@ -1854,7 +1866,7 @@ def _log_switchover_final_summary(self): switchover_completed = (not self._rollback and self._summary_status.phase == BlueGreenPhase.COMPLETED) or \ (self._rollback and self._summary_status.phase == BlueGreenPhase.CREATED) has_active_switchover_phases = \ - any(phase_info.phase is not None and phase_info.phase.is_active_switchover_completed + any(phase_info.phase is not None and phase_info.phase.is_active_switchover_completed() for phase_info in self._phase_times_ns.values()) if not switchover_completed or not has_active_switchover_phases: diff --git a/aws_advanced_python_wrapper/plugin_service.py b/aws_advanced_python_wrapper/plugin_service.py index 37cebaac..957a801e 100644 --- a/aws_advanced_python_wrapper/plugin_service.py +++ b/aws_advanced_python_wrapper/plugin_service.py @@ -148,7 +148,7 @@ def set_current_connection(self, connection: Connection, host_info: HostInfo): @property @abstractmethod - def current_host_info(self) -> Optional[HostInfo]: + def current_host_info(self) -> HostInfo: ... @property From 28e3b5e0c05d69465d30f4343bac88f231834e4b Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 17 Jul 2025 14:19:43 -0700 Subject: [PATCH 29/41] cleanup --- .../blue_green_plugin.py | 27 ++++++------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 102f8d45..f535cd74 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -839,7 +839,6 @@ def __init__( self._connected_ip_address: Optional[str] = None self._is_host_info_correct = Event() self._has_started = Event() - self._open_connection_lock = RLock() db_dialect = self._plugin_service.database_dialect if not isinstance(db_dialect, BlueGreenDialect): @@ -897,31 +896,21 @@ def _run(self): logger.debug("BlueGreenStatusMonitor.ThreadCompleted", self._bg_role) def _open_connection(self): - if not self._is_new_conn_required(): - return - - with self._open_connection_lock: - if not self._is_new_conn_required(): - return - - self._connection = None - self._panic_mode.set() - self._open_connection_thread = \ - Thread(daemon=True, name="BlueGreenMonitorConnectionOpener", target=self._open_connection_task) - self._open_connection_thread.start() - - def _is_new_conn_required(self) -> bool: conn = self._connection if not self._is_connection_closed(conn): - return False + return if self._open_connection_thread is not None: if self._open_connection_thread.is_alive(): - return False # The task to open the connection is in progress, let's wait. + return # The task to open the connection is in progress, let's wait. elif not self._panic_mode.is_set(): - return False # The connection should be open by now since the open connection task is not running. + return # The connection should be open by now since the open connection task is not running. - return True + self._connection = None + self._panic_mode.set() + self._open_connection_thread = \ + Thread(daemon=True, name="BlueGreenMonitorConnectionOpener", target=self._open_connection_task) + self._open_connection_thread.start() def _open_connection_task(self): host_info = self._connection_host_info From a303e27477502b4fb9217519f58a4d157b59082e Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 17 Jul 2025 14:23:58 -0700 Subject: [PATCH 30/41] cleanup --- aws_advanced_python_wrapper/blue_green_plugin.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index f535cd74..6b07500c 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -302,7 +302,7 @@ def delay(self, delay_ms: int, bg_status: Optional[BlueGreenStatus], plugin_serv time.sleep(delay_ms / 1_000) return - while bg_status is plugin_service.get_status(BlueGreenStatus, bg_id) and time.time() < end_time_sec: + while bg_status is plugin_service.get_status(BlueGreenStatus, bg_id) and time.time() <= end_time_sec: with bg_status.cv: bg_status.cv.wait(min_delay_ms / 1_000) @@ -515,7 +515,7 @@ def apply( while time.time() <= end_time_sec and \ bg_status is not None and \ bg_status.phase != BlueGreenPhase.COMPLETED and \ - (corresponding_pair is None or (len(corresponding_pair) > 1 and corresponding_pair[1] is None)): + (corresponding_pair is None or corresponding_pair[1] is None): # wait until the corresponding host is found, or until switchover is completed self.delay( SuspendUntilCorrespondingHostFoundConnectRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) @@ -598,7 +598,7 @@ def apply( end_time_sec = start_time_sec + timeout_ms / 1_000 try: - while time.time() < end_time_sec and \ + while time.time() <= end_time_sec and \ bg_status is not None and \ bg_status.phase == BlueGreenPhase.IN_PROGRESS: self.delay(SuspendExecuteRouting._SLEEP_TIME_MS, bg_status, plugin_service, self._bg_id) @@ -1881,7 +1881,7 @@ def _reset_context_when_completed(self): switchover_completed = (not self._rollback and self._summary_status.phase == BlueGreenPhase.COMPLETED) or \ (self._rollback and self._summary_status.phase == BlueGreenPhase.CREATED) has_active_switchover_phases = \ - any(phase_info.phase is not None and phase_info.phase.is_active_switchover_completed + any(phase_info.phase is not None and phase_info.phase.is_active_switchover_completed() for phase_info in self._phase_times_ns.values()) if not switchover_completed or not has_active_switchover_phases: From f09c9ad600bdc5e1c9de0399ce2c819957b0e965 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 17 Jul 2025 14:30:00 -0700 Subject: [PATCH 31/41] Fix flake8 --- aws_advanced_python_wrapper/blue_green_plugin.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 6b07500c..68ed784f 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -35,8 +35,7 @@ from dataclasses import dataclass from enum import Enum, auto from threading import Condition, Event, RLock, Thread -from typing import (Any, Callable, ClassVar, Dict, Optional, Set, - Tuple) +from typing import Any, Callable, ClassVar, Dict, Optional, Set, Tuple from aws_advanced_python_wrapper.errors import (AwsWrapperError, UnsupportedOperationError) From c7343910893be7d7696f25b557ca5dc7bb843eb9 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 17 Jul 2025 15:42:34 -0700 Subject: [PATCH 32/41] cleanup --- .../blue_green_plugin.py | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 68ed784f..e47f6fc4 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -199,7 +199,7 @@ def get_host_tuple_hash(self, current_hash: int, host_tuple: Optional[Tuple[Host if host_tuple is None or len(host_tuple) == 0: tuple_str = "" else: - tuple_str = ",".join(sorted(x.url + x.role for x in host_tuple)) + tuple_str = ",".join(sorted(f"{x.url}{x.role}" for x in host_tuple)) return self.get_value_hash(current_hash, tuple_str) @@ -890,6 +890,10 @@ def _run(self): self._delay(delay_ms) except Exception as e: logger.warning("BlueGreenStatusMonitor.MonitoringUnhandledException", self._bg_role, e) + import traceback + traceback.print_exc() + print(e) + finally: self._close_connection() logger.debug("BlueGreenStatusMonitor.ThreadCompleted", self._bg_role) @@ -1238,10 +1242,9 @@ def __init__(self, plugin_service: PluginService, props: Properties, bg_id: str) dialect = self._plugin_service.database_dialect if not isinstance(dialect, BlueGreenDialect): - # TODO: raise an error instead? Seems like we will encounter an error later if we don't raise one here. - logger.warning( - "BlueGreenStatusProvider.UnsupportedDialect", self._bg_id, dialect.__class__.__name__) - return + raise AwsWrapperError( + Messages.get_formatted( + "BlueGreenStatusProvider.UnsupportedDialect", self._bg_id, dialect.__class__.__name__)) current_host_info = self._plugin_service.current_host_info blue_monitor = BlueGreenStatusMonitor( @@ -1854,25 +1857,28 @@ def _log_switchover_final_summary(self): switchover_completed = (not self._rollback and self._summary_status.phase == BlueGreenPhase.COMPLETED) or \ (self._rollback and self._summary_status.phase == BlueGreenPhase.CREATED) has_active_switchover_phases = \ - any(phase_info.phase is not None and phase_info.phase.is_active_switchover_completed() + any(phase_info.phase is not None and phase_info.phase.is_switchover_active_or_completed for phase_info in self._phase_times_ns.values()) - if not switchover_completed or not has_active_switchover_phases: - return + # if not switchover_completed or not has_active_switchover_phases: + # return time_zero_phase = BlueGreenPhase.PREPARATION if self._rollback else BlueGreenPhase.IN_PROGRESS time_zero_key = f"{time_zero_phase.name} (rollback)" if self._rollback else time_zero_phase.name time_zero = self._phase_times_ns.get(time_zero_key) sorted_phase_entries = sorted(self._phase_times_ns.items(), key=lambda entry: entry[1].timestamp_ns) - phase_time_lines = [ - f"{entry[1].date_time:>28s} " - f"{'' if time_zero is None else (entry[1].timestamp_ns - time_zero.timestamp_ns) / 1_000_000:>18s} ms " - f"{entry[0]:>31s}" for entry in sorted_phase_entries + formatted_phase_entries = [ + "{:>28s} {:>18s} ms {:>31s}".format( + str(entry[1].date_time), + "" if time_zero is None else str((entry[1].timestamp_ns - time_zero.timestamp_ns) // 1_000_000), + entry[0] + ) for entry in sorted_phase_entries ] - phase_times_str = "\n".join(phase_time_lines) + phase_times_str = "\n".join(formatted_phase_entries) divider = "----------------------------------------------------------------------------------\n" + header = "{:<28s} {:>21s} {:>31s}\n".format("timestamp", "time offset (ms)", "event") log_message = (f"[bg_id: '{self._bg_id}']\n{divider}" - f"{'timestamp':<28s} {'time offset (ms)':>21s} {'event':>31s}{divider}" + f"{header}{divider}" f"{phase_times_str}\n{divider}") logger.debug(log_message) @@ -1880,7 +1886,7 @@ def _reset_context_when_completed(self): switchover_completed = (not self._rollback and self._summary_status.phase == BlueGreenPhase.COMPLETED) or \ (self._rollback and self._summary_status.phase == BlueGreenPhase.CREATED) has_active_switchover_phases = \ - any(phase_info.phase is not None and phase_info.phase.is_active_switchover_completed() + any(phase_info.phase is not None and phase_info.phase.is_switchover_active_or_completed for phase_info in self._phase_times_ns.values()) if not switchover_completed or not has_active_switchover_phases: From c2c5bb55acb206d59b1fe3c2b30db413ad8d4929 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 17 Jul 2025 16:06:51 -0700 Subject: [PATCH 33/41] Uncomment commented out code --- aws_advanced_python_wrapper/blue_green_plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index e47f6fc4..5f2d2966 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -1860,8 +1860,8 @@ def _log_switchover_final_summary(self): any(phase_info.phase is not None and phase_info.phase.is_switchover_active_or_completed for phase_info in self._phase_times_ns.values()) - # if not switchover_completed or not has_active_switchover_phases: - # return + if not switchover_completed or not has_active_switchover_phases: + return time_zero_phase = BlueGreenPhase.PREPARATION if self._rollback else BlueGreenPhase.IN_PROGRESS time_zero_key = f"{time_zero_phase.name} (rollback)" if self._rollback else time_zero_phase.name From 73966cd00e5345cf76459badfa689fdd938ed9f0 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 18 Jul 2025 14:43:00 -0700 Subject: [PATCH 34/41] Fix bug where variables were assigned a ValueContainer instead of the inner value of the ValueContainer --- aws_advanced_python_wrapper/blue_green_plugin.py | 4 ++-- aws_advanced_python_wrapper/utils/value_container.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 5f2d2966..e0902ff4 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -940,7 +940,7 @@ def _open_connection_task(self): else: logger.debug("BlueGreenStatusMonitor.OpeningConnection", self._bg_role, host_info.host) self._connection = self._plugin_service.force_connect(host_info, self._props) - self._connected_ip_address = self._get_ip_address(host_info.host) + self._connected_ip_address = self._get_ip_address(host_info.host).or_else(None) logger.debug("BlueGreenStatusMonitor.OpenedConnection", self._bg_role, host_info.host) self._panic_mode.clear() @@ -1037,7 +1037,7 @@ def _collect_status(self): if not self._is_host_info_correct.is_set() and status_info is not None: # We connected to an initial host info that might not be the desired blue or green cluster. Let's check # if we need to reconnect to the correct one. - status_info_ip_address = self._get_ip_address(status_info.endpoint) + status_info_ip_address = self._get_ip_address(status_info.endpoint).or_else(None) connected_ip_address = self._connected_ip_address if connected_ip_address is not None and connected_ip_address != status_info_ip_address: # We are not connected to the desired blue or green cluster, we need to reconnect. diff --git a/aws_advanced_python_wrapper/utils/value_container.py b/aws_advanced_python_wrapper/utils/value_container.py index a49f8087..92f336bf 100644 --- a/aws_advanced_python_wrapper/utils/value_container.py +++ b/aws_advanced_python_wrapper/utils/value_container.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Generic, TypeVar, Union, cast +from typing import Generic, TypeVar, Union, cast, Optional V = TypeVar('V') @@ -57,7 +57,7 @@ def get(self) -> V: raise ValueError("No value present") return cast('V', self._value) - def or_else(self, other: V) -> V: + def or_else(self, other: Optional[V]) -> V: """Returns the value if present, otherwise returns other.""" return cast('V', self._value) if self.is_present() else other From 2b082c9e811e2d6d8559a34558fa718271dd37c5 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Tue, 22 Jul 2025 15:08:56 -0700 Subject: [PATCH 35/41] Fix log messages --- .../blue_green_plugin.py | 64 ++++++++++--------- aws_advanced_python_wrapper/hostinfo.py | 3 + .../utils/concurrent.py | 3 + .../utils/value_container.py | 11 +--- 4 files changed, 43 insertions(+), 38 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index e0902ff4..eb54b396 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -585,7 +585,7 @@ def apply( execute_func: Callable, *args: Any, **kwargs: Any) -> ValueContainer[Any]: - logger.debug("SuspendExecuteRouting.InProgressSuspendMethod") + logger.debug("SuspendExecuteRouting.InProgressSuspendMethod", method_name) telemetry_factory = plugin_service.get_telemetry_factory() telemetry_context = telemetry_factory.open_telemetry_context( @@ -735,10 +735,10 @@ def execute(self, target: type, method_name: str, execute_func: Callable, *args: if routing is None: return execute_func() - result: ValueContainer[Any] = ValueContainer.empty() + result_container: ValueContainer[Any] = ValueContainer.empty() self._start_time_ns.set(perf_counter_ns()) - while routing is not None and not result.is_present(): - result = routing.apply( + while routing is not None and not result_container.is_present(): + result_container = routing.apply( self, self._plugin_service, self._props, @@ -747,7 +747,7 @@ def execute(self, target: type, method_name: str, execute_func: Callable, *args: execute_func, *args, **kwargs) - if result.is_present(): + if result_container.is_present(): break latest_status = self._plugin_service.get_status(BlueGreenStatus, self._bg_id) @@ -759,8 +759,8 @@ def execute(self, target: type, method_name: str, execute_func: Callable, *args: next((r for r in self._bg_status.execute_routings if r.is_match(host_info, bg_role)), None) self._end_time_ns.set(perf_counter_ns()) - if result.is_present(): - return result.get() + if result_container.is_present(): + return result_container.get() return execute_func() finally: @@ -890,9 +890,6 @@ def _run(self): self._delay(delay_ms) except Exception as e: logger.warning("BlueGreenStatusMonitor.MonitoringUnhandledException", self._bg_role, e) - import traceback - traceback.print_exc() - print(e) finally: self._close_connection() @@ -1148,14 +1145,7 @@ def _update_ip_address_flags(self): # Check whether all hosts in start_topology no longer have IP addresses. This indicates that the start_topology # hosts can no longer be resolved because their DNS entries no longer exist. - self._all_start_topology_endpoints_removed = ( - bool(self._start_topology) and - all( - self._start_ip_addresses_by_host.get(host_info.host) is not None and - self._current_ip_addresses_by_host.get(host_info.host) is None - for host_info in self._start_topology - ) - ) + self._all_start_topology_endpoints_removed = self._are_all_start_endpoints_removed() if not self.should_collect_topology.is_set(): # Check whether all hosts in current_topology do not exist in start_topology @@ -1172,13 +1162,27 @@ def _has_all_start_topology_ip_changed(self) -> bool: return False for host_info in self._start_topology: - start_ip = self._start_ip_addresses_by_host.get(host_info.host) - current_ip = self._current_ip_addresses_by_host.get(host_info.host) - if start_ip is None or not start_ip.is_present() or \ - current_ip is None or not current_ip.is_present(): + start_ip_container = self._start_ip_addresses_by_host.get(host_info.host) + current_ip_container = self._current_ip_addresses_by_host.get(host_info.host) + if start_ip_container is None or not start_ip_container.is_present() or \ + current_ip_container is None or not current_ip_container.is_present(): + return False + + if start_ip_container.get() == current_ip_container.get(): return False - if start_ip.get() == current_ip.get(): + return True + + def _are_all_start_endpoints_removed(self) -> bool: + start_topology = self._start_topology + if not start_topology: + return False + + for host_info in start_topology: + start_ip_container = self._start_ip_addresses_by_host.get(host_info.host) + current_ip_container = self._current_ip_addresses_by_host.get(host_info.host) + if start_ip_container is None or current_ip_container is None or \ + not start_ip_container.is_present() or current_ip_container.is_present(): return False return True @@ -1474,21 +1478,21 @@ def _update_summary_status(self, bg_role: BlueGreenRole, interim_status: BlueGre self._summary_status = self._get_status_of_completed() else: - raise ValueError(Messages.get_formatted("bgd.unknownPhase", self._bg_id, self._latest_phase)) + raise ValueError(Messages.get_formatted("BlueGreenStatusProvider.UnknownPhase", self._bg_id, self._latest_phase)) def _update_dns_flags(self, bg_role: BlueGreenRole, interim_status: BlueGreenInterimStatus): if bg_role == BlueGreenRole.SOURCE and not self._blue_dns_update_completed and interim_status.all_start_topology_ip_changed: - logger.debug("bgd.blueDnsCompleted", self._bg_id) + logger.debug("BlueGreenStatusProvider.BlueDnsCompleted", self._bg_id) self._blue_dns_update_completed = True self._store_event_phase_time("Blue DNS updated") if bg_role == BlueGreenRole.TARGET and not self._green_dns_removed and interim_status.all_start_topology_endpoints_removed: - logger.debug("bgd.greenDnsRemoved", self._bg_id) + logger.debug("BlueGreenStatusProvider.GreenDnsRemoved", self._bg_id) self._green_dns_removed = True self._store_event_phase_time("Green DNS removed") if bg_role == BlueGreenRole.TARGET and not self._green_topology_changed and interim_status.all_topology_changed: - logger.debug("bgd.greenTopologyChanged", self._bg_id) + logger.debug("BlueGreenStatusProvider.GreenTopologyChanged", self._bg_id) self._green_topology_changed = True self._store_event_phase_time("Green topology changed") @@ -1552,12 +1556,12 @@ def _get_blue_ip_address_connect_routings(self) -> List[ConnectRouting]: continue blue_host_info = host_pair[0] - blue_ip = self._host_ip_addresses.get(blue_host_info.host) - if blue_ip is None or not blue_ip.is_present(): + blue_ip_container = self._host_ip_addresses.get(blue_host_info.host) + if blue_ip_container is None or not blue_ip_container.is_present(): blue_ip_host_info = blue_host_info else: blue_ip_host_info = copy(blue_host_info) - blue_host_info.host = blue_ip.get() + blue_host_info.host = blue_ip_container.get() host_routing = SubstituteConnectRouting(blue_ip_host_info, host, role, (blue_host_info,)) interim_status = self._interim_statuses[role.value] diff --git a/aws_advanced_python_wrapper/hostinfo.py b/aws_advanced_python_wrapper/hostinfo.py index a6313abd..7f151350 100644 --- a/aws_advanced_python_wrapper/hostinfo.py +++ b/aws_advanced_python_wrapper/hostinfo.py @@ -72,6 +72,9 @@ def __eq__(self, other: object): def __str__(self): return f"HostInfo({self.host}, {self.port}, {self.role}, {self.availability})" + def __repr__(self): + return f"HostInfo({self.host}, {self.port}, {self.role}, {self.availability})" + def __copy__(self): return HostInfo( host=self.host, diff --git a/aws_advanced_python_wrapper/utils/concurrent.py b/aws_advanced_python_wrapper/utils/concurrent.py index 17d6fd8b..04836932 100644 --- a/aws_advanced_python_wrapper/utils/concurrent.py +++ b/aws_advanced_python_wrapper/utils/concurrent.py @@ -40,6 +40,9 @@ def __contains__(self, key): def __str__(self): return f"ConcurrentDict{str(self._dict)}" + def __repr__(self): + return f"ConcurrentDict{str(self._dict)}" + def get(self, key: K, default_value: Optional[V] = None) -> Optional[V]: return self._dict.get(key, default_value) diff --git a/aws_advanced_python_wrapper/utils/value_container.py b/aws_advanced_python_wrapper/utils/value_container.py index 92f336bf..3cd04789 100644 --- a/aws_advanced_python_wrapper/utils/value_container.py +++ b/aws_advanced_python_wrapper/utils/value_container.py @@ -33,36 +33,29 @@ def __init__(self, value: Union[Empty, V] = _EMPTY): @classmethod def of(cls, value: V) -> 'ValueContainer[V]': - """Returns a ValueContainer with the specified non-None value.""" if value is None: raise ValueError("Value cannot be None") return cls(value) @classmethod def empty(cls) -> 'ValueContainer[V]': - """Returns an empty ValueContainer instance.""" return cls() def is_present(self) -> bool: - """Returns true if a value is present.""" return self._value is not self._EMPTY def is_empty(self) -> bool: - """Returns true if no value is present.""" return self._value is self._EMPTY def get(self) -> V: - """Returns the value if present, otherwise raises ValueError.""" if self._value is self._EMPTY: raise ValueError("No value present") return cast('V', self._value) def or_else(self, other: Optional[V]) -> V: - """Returns the value if present, otherwise returns other.""" return cast('V', self._value) if self.is_present() else other def __eq__(self, other: object) -> bool: - """Checks if this ValueContainer is equal to another object.""" if not isinstance(other, ValueContainer): return False if self.is_empty() and other.is_empty(): @@ -72,5 +65,7 @@ def __eq__(self, other: object) -> bool: return self._value == other._value def __str__(self) -> str: - """Returns a string representation of this ValueContainer.""" + return "ValueContainer.empty" if self.is_empty() else f"ValueContainer[{self._value}]" + + def __repr__(self) -> str: return "ValueContainer.empty" if self.is_empty() else f"ValueContainer[{self._value}]" From 680190d74d6fe78f4665b748343dd150636f8dba Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Wed, 23 Jul 2025 17:32:12 -0700 Subject: [PATCH 36/41] Update multi-az dialect names --- .../database_dialect.py | 38 +++++++++---------- tests/unit/test_dialect.py | 4 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/aws_advanced_python_wrapper/database_dialect.py b/aws_advanced_python_wrapper/database_dialect.py index 33a91dbe..de9ffa43 100644 --- a/aws_advanced_python_wrapper/database_dialect.py +++ b/aws_advanced_python_wrapper/database_dialect.py @@ -53,12 +53,12 @@ class DialectCode(Enum): # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/multi-az-db-clusters-concepts.html - MULTI_AZ_MYSQL = "multi-az-mysql" + MULTI_AZ_CLUSTER_MYSQL = "multi-az-mysql" AURORA_MYSQL = "aurora-mysql" RDS_MYSQL = "rds-mysql" MYSQL = "mysql" - MULTI_AZ_PG = "multi-az-pg" + MULTI_AZ_CLUSTER_PG = "multi-az-pg" AURORA_PG = "aurora-pg" RDS_PG = "rds-pg" PG = "pg" @@ -168,7 +168,7 @@ def query_for_dialect(self, url: str, host_info: Optional[HostInfo], conn: Conne class MysqlDatabaseDialect(DatabaseDialect): _DIALECT_UPDATE_CANDIDATES: Tuple[DialectCode, ...] = ( - DialectCode.AURORA_MYSQL, DialectCode.MULTI_AZ_MYSQL, DialectCode.RDS_MYSQL) + DialectCode.AURORA_MYSQL, DialectCode.MULTI_AZ_CLUSTER_MYSQL, DialectCode.RDS_MYSQL) _exception_handler: Optional[ExceptionHandler] = None @property @@ -219,7 +219,7 @@ def prepare_conn_props(self, props: Properties): class PgDatabaseDialect(DatabaseDialect): _DIALECT_UPDATE_CANDIDATES: Tuple[DialectCode, ...] = ( - DialectCode.AURORA_PG, DialectCode.MULTI_AZ_PG, DialectCode.RDS_PG) + DialectCode.AURORA_PG, DialectCode.MULTI_AZ_CLUSTER_PG, DialectCode.RDS_PG) _exception_handler: Optional[ExceptionHandler] = None @property @@ -277,7 +277,7 @@ def is_blue_green_status_available(self, conn: Connection) -> bool: class RdsMysqlDialect(MysqlDatabaseDialect, BlueGreenDialect): - _DIALECT_UPDATE_CANDIDATES = (DialectCode.AURORA_MYSQL, DialectCode.MULTI_AZ_MYSQL) + _DIALECT_UPDATE_CANDIDATES = (DialectCode.AURORA_MYSQL, DialectCode.MULTI_AZ_CLUSTER_MYSQL) _BG_STATUS_QUERY = "SELECT version, endpoint, port, role, status FROM mysql.rds_topology" _BG_STATUS_EXISTS_QUERY = \ @@ -331,7 +331,7 @@ class RdsPgDialect(PgDatabaseDialect, BlueGreenDialect): "(setting LIKE '%aurora_stat_utils%') AS aurora_stat_utils " "FROM pg_settings " "WHERE name='rds.extensions'") - _DIALECT_UPDATE_CANDIDATES = (DialectCode.AURORA_PG, DialectCode.MULTI_AZ_PG) + _DIALECT_UPDATE_CANDIDATES = (DialectCode.AURORA_PG, DialectCode.MULTI_AZ_CLUSTER_PG) _BG_STATUS_QUERY = (f"SELECT version, endpoint, port, role, status " f"FROM rds_tools.show_topology('aws_advanced_python_wrapper-{DriverInfo.DRIVER_VERSION}')") @@ -376,7 +376,7 @@ def is_blue_green_status_available(self, conn: Connection) -> bool: class AuroraMysqlDialect(MysqlDatabaseDialect, TopologyAwareDatabaseDialect, BlueGreenDialect): - _DIALECT_UPDATE_CANDIDATES = (DialectCode.MULTI_AZ_MYSQL,) + _DIALECT_UPDATE_CANDIDATES = (DialectCode.MULTI_AZ_CLUSTER_MYSQL,) _TOPOLOGY_QUERY = ("SELECT SERVER_ID, CASE WHEN SESSION_ID = 'MASTER_SESSION_ID' THEN TRUE ELSE FALSE END, " "CPU, REPLICA_LAG_IN_MILLISECONDS, LAST_UPDATE_TIMESTAMP " "FROM information_schema.replica_host_status " @@ -424,7 +424,7 @@ def is_blue_green_status_available(self, conn: Connection) -> bool: class AuroraPgDialect(PgDatabaseDialect, TopologyAwareDatabaseDialect, AuroraLimitlessDialect, BlueGreenDialect): - _DIALECT_UPDATE_CANDIDATES: Tuple[DialectCode, ...] = (DialectCode.MULTI_AZ_PG,) + _DIALECT_UPDATE_CANDIDATES: Tuple[DialectCode, ...] = (DialectCode.MULTI_AZ_CLUSTER_PG,) _EXTENSIONS_QUERY = "SELECT (setting LIKE '%aurora_stat_utils%') AS aurora_stat_utils " \ "FROM pg_settings WHERE name='rds.extensions'" @@ -495,7 +495,7 @@ def is_blue_green_status_available(self, conn: Connection) -> bool: return False -class MultiAzMysqlDialect(MysqlDatabaseDialect, TopologyAwareDatabaseDialect): +class MultiAzClusterMysqlDialect(MysqlDatabaseDialect, TopologyAwareDatabaseDialect): _TOPOLOGY_QUERY = "SELECT id, endpoint, port FROM mysql.rds_topology" _WRITER_HOST_QUERY = "SHOW REPLICA STATUS" _WRITER_HOST_COLUMN_INDEX = 39 @@ -510,7 +510,7 @@ def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: initial_transaction_status: bool = driver_dialect.is_in_transaction(conn) try: with closing(conn.cursor()) as cursor: - cursor.execute(MultiAzMysqlDialect._TOPOLOGY_QUERY) + cursor.execute(MultiAzClusterMysqlDialect._TOPOLOGY_QUERY) records = cursor.fetchall() if not records: return False @@ -552,7 +552,7 @@ def prepare_conn_props(self, props: Properties): props["conn_attrs"].update(extra_conn_attrs) -class MultiAzPgDialect(PgDatabaseDialect, TopologyAwareDatabaseDialect): +class MultiAzClusterPgDialect(PgDatabaseDialect, TopologyAwareDatabaseDialect): # The driver name passed to show_topology is used for RDS metrics purposes. # It is not required for functional correctness. _TOPOLOGY_QUERY = \ @@ -569,16 +569,16 @@ def dialect_update_candidates(self) -> Optional[Tuple[DialectCode, ...]]: @property def exception_handler(self) -> Optional[ExceptionHandler]: - if MultiAzPgDialect._exception_handler is None: - MultiAzPgDialect._exception_handler = Utils.initialize_class( + if MultiAzClusterPgDialect._exception_handler is None: + MultiAzClusterPgDialect._exception_handler = Utils.initialize_class( "aws_advanced_python_wrapper.utils.pg_exception_handler.MultiAzPgExceptionHandler") - return MultiAzPgDialect._exception_handler + return MultiAzClusterPgDialect._exception_handler def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: initial_transaction_status: bool = driver_dialect.is_in_transaction(conn) try: with closing(conn.cursor()) as cursor: - cursor.execute(MultiAzPgDialect._WRITER_HOST_QUERY) + cursor.execute(MultiAzClusterPgDialect._WRITER_HOST_QUERY) if cursor.fetchone() is not None: return True except Exception: @@ -605,8 +605,8 @@ class UnknownDatabaseDialect(DatabaseDialect): DialectCode.RDS_PG, DialectCode.AURORA_MYSQL, DialectCode.AURORA_PG, - DialectCode.MULTI_AZ_MYSQL, - DialectCode.MULTI_AZ_PG) + DialectCode.MULTI_AZ_CLUSTER_MYSQL, + DialectCode.MULTI_AZ_CLUSTER_PG) @property def default_port(self) -> int: @@ -647,11 +647,11 @@ class DatabaseDialectManager(DatabaseDialectProvider): DialectCode.MYSQL: MysqlDatabaseDialect(), DialectCode.RDS_MYSQL: RdsMysqlDialect(), DialectCode.AURORA_MYSQL: AuroraMysqlDialect(), - DialectCode.MULTI_AZ_MYSQL: MultiAzMysqlDialect(), + DialectCode.MULTI_AZ_CLUSTER_MYSQL: MultiAzClusterMysqlDialect(), DialectCode.PG: PgDatabaseDialect(), DialectCode.RDS_PG: RdsPgDialect(), DialectCode.AURORA_PG: AuroraPgDialect(), - DialectCode.MULTI_AZ_PG: MultiAzPgDialect(), + DialectCode.MULTI_AZ_CLUSTER_PG: MultiAzClusterPgDialect(), DialectCode.UNKNOWN: UnknownDatabaseDialect() } diff --git a/tests/unit/test_dialect.py b/tests/unit/test_dialect.py index 34540893..fb7f658c 100644 --- a/tests/unit/test_dialect.py +++ b/tests/unit/test_dialect.py @@ -19,7 +19,7 @@ from aws_advanced_python_wrapper.database_dialect import ( AuroraMysqlDialect, AuroraPgDialect, DatabaseDialectManager, DialectCode, - MultiAzMysqlDialect, MysqlDatabaseDialect, PgDatabaseDialect, + MultiAzClusterMysqlDialect, MysqlDatabaseDialect, PgDatabaseDialect, RdsMysqlDialect, RdsPgDialect, TargetDriverType, UnknownDatabaseDialect) from aws_advanced_python_wrapper.driver_info import DriverInfo from aws_advanced_python_wrapper.errors import AwsWrapperError @@ -216,7 +216,7 @@ def test_get_dialect_user_setting(mock_driver_dialect): def test_prepare_conn_props__multi_az_mysql(): - dialect = MultiAzMysqlDialect() + dialect = MultiAzClusterMysqlDialect() props = Properties({"host": "some_host"}) expected = Properties({ "host": "some_host", From 418758a062ca4cfec6abde2310e66e78a7002122 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Wed, 23 Jul 2025 17:33:23 -0700 Subject: [PATCH 37/41] Fix bug where TARGET's IP maps contained the original URL instead of just the green URL --- aws_advanced_python_wrapper/blue_green_plugin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index eb54b396..94d6fdaa 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -1039,6 +1039,7 @@ def _collect_status(self): if connected_ip_address is not None and connected_ip_address != status_info_ip_address: # We are not connected to the desired blue or green cluster, we need to reconnect. self._connection_host_info = HostInfo(host=status_info.endpoint, port=status_info.port) + self._props["host"] = status_info.endpoint self._is_host_info_correct.set() self._close_connection() self._panic_mode.set() From 2cadaac636fb68f8f030f9bda7dd568598466b73 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Thu, 24 Jul 2025 16:53:43 -0700 Subject: [PATCH 38/41] Fix PG bug: cannot switch autocommit inside transaction --- aws_advanced_python_wrapper/blue_green_plugin.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/aws_advanced_python_wrapper/blue_green_plugin.py b/aws_advanced_python_wrapper/blue_green_plugin.py index 94d6fdaa..aeed76e5 100644 --- a/aws_advanced_python_wrapper/blue_green_plugin.py +++ b/aws_advanced_python_wrapper/blue_green_plugin.py @@ -806,6 +806,9 @@ def __init__( self._bg_id = bg_id self._initial_host_info = initial_host_info self._plugin_service = plugin_service + + # autocommit is False by default. When False, the BG status query may return stale data, so we set it to True. + props["autocommit"] = True self._props = props self._status_check_intervals_ms = status_check_intervals_ms self._interim_status_processor = interim_status_processor @@ -976,7 +979,6 @@ def _collect_status(self): return status_entries = [] - conn.autocommit = True with conn.cursor() as cursor: cursor.execute(self._bg_dialect.blue_green_status_query) for record in cursor: @@ -1562,7 +1564,7 @@ def _get_blue_ip_address_connect_routings(self) -> List[ConnectRouting]: blue_ip_host_info = blue_host_info else: blue_ip_host_info = copy(blue_host_info) - blue_host_info.host = blue_ip_container.get() + blue_ip_host_info.host = blue_ip_container.get() host_routing = SubstituteConnectRouting(blue_ip_host_info, host, role, (blue_host_info,)) interim_status = self._interim_statuses[role.value] From 53544b5238a93d74ef45d6c3830d12abe650eb3d Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 25 Jul 2025 09:39:29 -0700 Subject: [PATCH 39/41] Cleanup --- .../resources/aws_advanced_python_wrapper_messages.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties index 5cbf538d..9d690871 100644 --- a/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties +++ b/aws_advanced_python_wrapper/resources/aws_advanced_python_wrapper_messages.properties @@ -379,7 +379,7 @@ StaleDnsPlugin.RequireDynamicProvider=[StaleDnsPlugin] A dynamic host list provi SubstituteConnectRouting.InProgressCantOpenConnection=[SubstituteConnectRouting] Blue/Green Deployment switchover is in progress. Can't establish connection to '{}'. SubstituteConnectRouting.RequireIamHost=[SubstituteConnectRouting] Connecting with IP address when IAM authentication is enabled requires an 'iamHost' parameter. -SuspendConnectRouting.InProgressSuspendConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is in progress. The 'connect' call will be delayed until switchover is completed. +SuspendConnectRouting.InProgressSuspendConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is in progress. The 'connect' call will be delayed until switchover is completed. SuspendConnectRouting.InProgressTryConnectLater=[SuspendConnectRouting] Blue/Green Deployment switchover is still in progress after {} seconds. Try to connect again later. SuspendConnectRouting.SwitchoverCompleteContinueWithConnect=[SuspendConnectRouting] Blue/Green Deployment switchover is completed. Continue with connect call. The call was suspended for {} ms. From 7275e7af4af6d1ccd0ab08d8f3e2694ccddeef27 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 25 Jul 2025 10:40:37 -0700 Subject: [PATCH 40/41] Fix mypy --- aws_advanced_python_wrapper/utils/value_container.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws_advanced_python_wrapper/utils/value_container.py b/aws_advanced_python_wrapper/utils/value_container.py index 3cd04789..092194ba 100644 --- a/aws_advanced_python_wrapper/utils/value_container.py +++ b/aws_advanced_python_wrapper/utils/value_container.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Generic, TypeVar, Union, cast, Optional +from typing import Generic, Optional, TypeVar, Union, cast V = TypeVar('V') @@ -52,7 +52,7 @@ def get(self) -> V: raise ValueError("No value present") return cast('V', self._value) - def or_else(self, other: Optional[V]) -> V: + def or_else(self, other: Optional[V]) -> Optional[V]: return cast('V', self._value) if self.is_present() else other def __eq__(self, other: object) -> bool: From f691da581e34822a332643a30b0b203c3f4ead74 Mon Sep 17 00:00:00 2001 From: aaron-congo Date: Fri, 25 Jul 2025 12:50:54 -0700 Subject: [PATCH 41/41] fix: dialect selection for PG multi-az instance --- aws_advanced_python_wrapper/database_dialect.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aws_advanced_python_wrapper/database_dialect.py b/aws_advanced_python_wrapper/database_dialect.py index de9ffa43..2249116d 100644 --- a/aws_advanced_python_wrapper/database_dialect.py +++ b/aws_advanced_python_wrapper/database_dialect.py @@ -579,7 +579,8 @@ def is_dialect(self, conn: Connection, driver_dialect: DriverDialect) -> bool: try: with closing(conn.cursor()) as cursor: cursor.execute(MultiAzClusterPgDialect._WRITER_HOST_QUERY) - if cursor.fetchone() is not None: + record = cursor.fetchone() + if record is not None and len(record) > 0 and bool(record[0]): return True except Exception: if not initial_transaction_status and driver_dialect.is_in_transaction(conn):