Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-2289] [DPE-2388] Upgrade from 14/stable and add integration tests #210

Merged
merged 44 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
fc11c14
Added initial upgrade implementation
marceloneppel Jul 12, 2023
6e64a14
Merge remote-tracking branch 'origin/main' into dpe-1769-minor-versio…
marceloneppel Jul 12, 2023
73e5ce4
Minor fixes
marceloneppel Jul 12, 2023
a332b45
Adjusted code to correctly update the upgrade stack and speed up the …
marceloneppel Jul 14, 2023
77aeeee
Minor fixes
marceloneppel Jul 17, 2023
018b714
Merge remote-tracking branch 'origin/main' into dpe-1769-minor-versio…
marceloneppel Jul 19, 2023
d9d7bb0
Updated the code with the new library
marceloneppel Jul 21, 2023
34c889a
Fixed upgrade logic to switchover to unit zero and added unit tests
marceloneppel Jul 24, 2023
3146cac
Added backup creation check
marceloneppel Jul 24, 2023
78539b6
Rollback dependencies
marceloneppel Jul 25, 2023
1bc2a38
Fixed dependencies
marceloneppel Jul 25, 2023
55c4d08
Minor fixes
marceloneppel Jul 25, 2023
2da5417
Merge remote-tracking branch 'origin/main' into dpe-1769-minor-versio…
marceloneppel Jul 25, 2023
2981b13
Final fixes
marceloneppel Jul 25, 2023
c249b44
Removed upgrade integration tests
marceloneppel Jul 25, 2023
1cd613f
Pin charmcraft revision
marceloneppel Jul 25, 2023
4972e40
Remove charmcraft pin
marceloneppel Jul 25, 2023
b2aa567
Removed hashes from requirements
marceloneppel Jul 25, 2023
b35d885
Revert "Removed upgrade integration tests"
marceloneppel Jul 25, 2023
1fe57bc
Renamed overriden method
marceloneppel Jul 26, 2023
7f18b62
Merge branch 'dpe-1769-minor-version-upgrade' into dpe-2289-upgrade-i…
marceloneppel Jul 26, 2023
d959c0a
Upgraded test
marceloneppel Jul 28, 2023
1f246e2
Added replication health check and rock dependency
marceloneppel Jul 28, 2023
e8c135b
Merge remote-tracking branch 'origin/dpe-1769-minor-version-upgrade' …
marceloneppel Aug 4, 2023
cb51a18
Merge remote-tracking branch 'origin/main' into dpe-2289-upgrade-inte…
marceloneppel Aug 9, 2023
112f4d6
Comment test
marceloneppel Sep 29, 2023
234fc31
Merge remote-tracking branch 'origin/main' into dpe-2289-upgrade-inte…
marceloneppel Sep 29, 2023
1215d1c
Merge remote-tracking branch 'origin/main' into dpe-2289-upgrade-inte…
marceloneppel Oct 4, 2023
e8fab1c
Add additional upgrade logic for stable revision
marceloneppel Oct 9, 2023
bf63c23
Merge remote-tracking branch 'origin/main' into dpe-2289-upgrade-inte…
marceloneppel Oct 9, 2023
650afe4
Fix partially the upgrade from stable
marceloneppel Oct 9, 2023
ab3c7b6
Fix exporter startup
marceloneppel Oct 9, 2023
09d4c55
Fix metrics service start
marceloneppel Oct 9, 2023
a4cd98c
Revert tests
marceloneppel Oct 9, 2023
99563fe
Format
marceloneppel Oct 9, 2023
c750f90
Lint
marceloneppel Oct 10, 2023
d6553e5
Add test for upgrade from stable
marceloneppel Oct 11, 2023
782cb8a
Add upgrade test
marceloneppel Oct 11, 2023
087b033
Merge remote-tracking branch 'origin/main' into dpe-2289-upgrade-inte…
marceloneppel Oct 11, 2023
30d257e
Minor fixes
marceloneppel Oct 11, 2023
3ca3ffb
Fix unit tests
marceloneppel Oct 11, 2023
ce618ab
Fix unit tests patches
marceloneppel Oct 11, 2023
1459b3e
Merge remote-tracking branch 'origin/main' into dpe-2289-upgrade-inte…
marceloneppel Oct 13, 2023
7c8bed5
PR feedback
marceloneppel Oct 13, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ jobs:
- password-rotation-integration
- plugins-integration
- tls-integration
- upgrade-integration
- upgrade-from-stable-integration
agent-versions:
- "2.9.45" # renovate: latest juju 2
- "3.1.5" # renovate: latest juju 3
Expand Down
30 changes: 24 additions & 6 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
MaintenanceStatus,
Relation,
SecretNotFoundError,
Unit,
WaitingStatus,
)
from ops.pebble import ChangeError, Layer, PathError, ProtocolError, ServiceStatus
Expand Down Expand Up @@ -163,6 +164,14 @@ def _generate_metrics_jobs(self, enable_tls: bool) -> Dict:
},
]

@property
def app_units(self) -> set[Unit]:
"""The peer-related units in the application."""
if not self._peers:
return set()

return {self.unit, *self._peers.units}

@property
def app_peer_data(self) -> Dict:
"""Application peer relation data object."""
Expand Down Expand Up @@ -541,21 +550,21 @@ def enable_disable_extensions(self, database: str = None) -> None:
Args:
database: optional database where to enable/disable the extension.
"""
orginial_status = self.unit.status
marceloneppel marked this conversation as resolved.
Show resolved Hide resolved
original_status = self.unit.status
for plugin in self.config.plugin_keys():
enable = self.config[plugin]
# Enable or disable the plugin/extension.
extension = "_".join(plugin.split("_")[1:-1])
self.unit.status = WaitingStatus(
f"{'Enabling' if enable else 'Disabling'} {extension}"
)
try:
self.unit.status = WaitingStatus(
f"{'Enabling' if enable else 'Disabling'} {extension}"
)
self.postgresql.enable_disable_extension(extension, enable, database)
self.unit.status = orginial_status
except PostgreSQLEnableDisableExtensionError as e:
logger.exception(
f"failed to {'enable' if enable else 'disable'} {extension} plugin: %s", str(e)
)
self.unit.status = original_status

def _add_members(self, event) -> None:
"""Add new cluster members.
Expand Down Expand Up @@ -1046,6 +1055,10 @@ def _on_stop(self, _):

def _on_update_status(self, _) -> None:
"""Update the unit status message."""
if not self.upgrade.idle:
logger.debug("Early exit on_update_status: upgrade in progress")
return

container = self.unit.get_container("postgresql")
if not container.can_connect():
logger.debug("on_update_status early exit: Cannot connect to container")
Expand Down Expand Up @@ -1204,7 +1217,11 @@ def _generate_metrics_service(self) -> Dict:
"override": "replace",
"summary": "postgresql metrics exporter",
"command": "/start-exporter.sh",
"startup": "enabled",
"startup": (
"enabled"
if self.get_secret("app", MONITORING_PASSWORD_KEY) is not None
else "disabled"
),
"after": [self._postgresql_service],
"user": WORKLOAD_OS_USER,
"group": WORKLOAD_OS_GROUP,
Expand Down Expand Up @@ -1415,6 +1432,7 @@ def update_config(self, is_creating_backup: bool = False) -> bool:
container.add_layer(
self._metrics_service,
Layer({"services": {self._metrics_service: self._generate_metrics_service()}}),
combine=True,
)
container.restart(self._metrics_service)

Expand Down
54 changes: 51 additions & 3 deletions src/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@
from lightkube.core.client import Client
from lightkube.core.exceptions import ApiError
from lightkube.resources.apps_v1 import StatefulSet
from ops.charm import WorkloadEvent
from ops.model import BlockedStatus
from ops.charm import UpgradeCharmEvent, WorkloadEvent
from ops.model import BlockedStatus, MaintenanceStatus, RelationDataContent
from pydantic import BaseModel
from tenacity import RetryError, Retrying, stop_after_attempt, wait_fixed
from typing_extensions import override

from constants import APP_SCOPE, MONITORING_PASSWORD_KEY, MONITORING_USER
from patroni import SwitchoverFailedError
from utils import new_password

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -51,6 +53,7 @@ def __init__(self, charm, model: BaseModel, **kwargs) -> None:
self.framework.observe(
getattr(self.charm.on, "postgresql_pebble_ready"), self._on_postgresql_pebble_ready
)
self.framework.observe(self.charm.on.upgrade_charm, self._on_upgrade_charm_check_legacy)

@property
def is_no_sync_member(self) -> bool:
Expand Down Expand Up @@ -80,7 +83,7 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
event.defer()
return

if self.peer_relation.data[self.charm.unit].get("state") != "upgrading":
if self.state not in ["upgrading", "recovery"]:
return

# Don't mark the upgrade of this unit as completed until Patroni reports the
Expand All @@ -90,6 +93,15 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None:
event.defer()
return

if self.charm.unit.is_leader():
if not self.charm._patroni.primary_endpoint_ready:
logger.debug(
"Deferring on_pebble_ready: current unit is leader but primary endpoint is not ready yet"
)
event.defer()
return
self._set_up_new_credentials_for_legacy()

try:
for attempt in Retrying(stop=stop_after_attempt(6), wait=wait_fixed(10)):
with attempt:
Expand Down Expand Up @@ -120,6 +132,25 @@ def _on_upgrade_changed(self, _) -> None:

self.charm.update_config()

def _on_upgrade_charm_check_legacy(self, event: UpgradeCharmEvent) -> None:
if not self.peer_relation:
logger.debug("Wait all units join the upgrade relation")
return

if self.state:
# Do nothing - if state set, upgrade is supported
return

logger.warning("Upgrading from unspecified version")

# All peers should set the state to upgrading.
self.unit_upgrade_data.update({"state": "upgrading"})

if self.charm.unit.name != f"{self.charm.app.name}/{self.charm.app.planned_units() - 1}":
self.charm.unit.status = MaintenanceStatus("upgrading unit")
self.peer_relation.data[self.charm.unit].update({"state": "upgrading"})
self._set_rolling_update_partition(self.charm.app.planned_units())

@override
def pre_upgrade_check(self) -> None:
"""Runs necessary checks validating the cluster is in a healthy state to upgrade.
Expand Down Expand Up @@ -220,3 +251,20 @@ def _set_first_rolling_update_partition(self) -> None:
self._set_rolling_update_partition(self.charm.app.planned_units() - 1)
except KubernetesClientError as e:
raise ClusterNotReadyError(e.message, e.cause)

def _set_up_new_credentials_for_legacy(self) -> None:
"""Create missing password and user."""
if self.charm.get_secret(APP_SCOPE, MONITORING_PASSWORD_KEY) is None:
self.charm.set_secret(APP_SCOPE, MONITORING_PASSWORD_KEY, new_password())
users = self.charm.postgresql.list_users()
if MONITORING_USER not in users:
self.charm.postgresql.create_user(
MONITORING_USER,
self.charm.get_secret(APP_SCOPE, MONITORING_PASSWORD_KEY),
extra_user_roles="pg_monitor",
)

@property
def unit_upgrade_data(self) -> RelationDataContent:
"""Return the application upgrade data."""
return self.peer_relation.data[self.charm.unit]
Loading