Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Status Handling in Charmed Operators #1743

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-charmed-katib.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
set -eux
sudo apt update
sudo apt install python3-setuptools
sudo pip3 install black flake8
sudo pip3 install black==20.8b1 flake8

- name: Check black
run: black --check operators
Expand Down
38 changes: 29 additions & 9 deletions operators/katib-controller/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,22 @@
from ops.charm import CharmBase
from ops.framework import StoredState
from ops.main import main
from ops.model import ActiveStatus, MaintenanceStatus
from ops.model import ActiveStatus, MaintenanceStatus, WaitingStatus

logger = logging.getLogger(__name__)


class CheckFailed(Exception):
""" Raise this exception if one of the checks in main fails. """

def __init__(self, msg, status_type=None):
super().__init__()

self.msg = msg
self.status_type = status_type
self.status = status_type(msg)


class Operator(CharmBase):
"""Deploys the katib-controller service."""

Expand All @@ -22,11 +33,6 @@ class Operator(CharmBase):
def __init__(self, framework):
super().__init__(framework)

if not self.model.unit.is_leader():
logger.info("Not a leader, skipping any work")
self.model.unit.status = ActiveStatus()
return

self._stored.set_default(**self.gen_certs())
self.image = OCIImageResource(self, "oci-image")
self.framework.observe(self.on.install, self.set_pod_spec)
Expand All @@ -36,9 +42,11 @@ def set_pod_spec(self, event):
self.model.unit.status = MaintenanceStatus("Setting pod spec")

try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
self.model.unit.status = e.status
self._check_leader()

image_details = self._check_image_details()
except CheckFailed as check_failed:
self.model.unit.status = check_failed.status
return

validating, mutating = yaml.safe_load_all(Path("src/webhooks.yaml").read_text())
Expand Down Expand Up @@ -294,6 +302,18 @@ def gen_certs(self):
"ca": Path("/run/ca.crt").read_text(),
}

def _check_leader(self):
if not self.unit.is_leader():
# We can't do anything useful when not the leader, so do nothing.
raise CheckFailed("Waiting for leadership", WaitingStatus)

def _check_image_details(self):
try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
raise CheckFailed(f"{e.status.message}", e.status_type)
return image_details


if __name__ == "__main__":
main(Operator)
64 changes: 43 additions & 21 deletions operators/katib-db-manager/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,23 @@
logger = logging.getLogger(__name__)


class CheckFailed(Exception):
""" Raise this exception if one of the checks in main fails. """

def __init__(self, msg, status_type=None):
super().__init__()

self.msg = msg
self.status_type = status_type
self.status = status_type(msg)


class Operator(CharmBase):
"""Deploys the katib-db-manager service."""

def __init__(self, framework):
super().__init__(framework)

if not self.model.unit.is_leader():
logger.info("Not a leader, skipping any work")
self.model.unit.status = ActiveStatus()
return

self.image = OCIImageResource(self, "oci-image")
self.framework.observe(self.on.install, self.set_pod_spec)
self.framework.observe(self.on.config_changed, self.set_pod_spec)
Expand All @@ -29,26 +35,18 @@ def __init__(self, framework):
self.framework.observe(self.on["mysql"].relation_changed, self.set_pod_spec)

def set_pod_spec(self, event):
self.model.unit.status = MaintenanceStatus("Setting pod spec")

try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
self.model.unit.status = e.status
return
self._check_leader()

try:
relation = self.model.relations["mysql"][0]
unit = next(iter(relation.units))
mysql_data = relation.data[unit]
# Ensure we've got some data sent over the relation
mysql_data["root_password"]
except (IndexError, StopIteration, KeyError):
self.model.unit.status = WaitingStatus(
"Waiting for mysql connection information"
)
image_details = self._check_image_details()

mysql_data = self._check_mysql()
except CheckFailed as check_failed:
self.model.unit.status = check_failed.status
return

self.model.unit.status = MaintenanceStatus("Setting pod spec")

self.model.pod.set_spec(
{
"version": 3,
Expand Down Expand Up @@ -117,6 +115,30 @@ def set_pod_spec(self, event):

self.model.unit.status = ActiveStatus()

def _check_leader(self):
if not self.unit.is_leader():
# We can't do anything useful when not the leader, so do nothing.
raise CheckFailed("Waiting for leadership", WaitingStatus)

def _check_image_details(self):
try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
raise CheckFailed(f"{e.status.message}", e.status_type)
return image_details

def _check_mysql(self):
try:
relation = self.model.relations["mysql"][0]
unit = next(iter(relation.units))
mysql_data = relation.data[unit]
# Ensure we've got some data sent over the relation
mysql_data["root_password"]
except (IndexError, StopIteration, KeyError):
raise CheckFailed("Waiting for mysql connection information", WaitingStatus)

return mysql_data


if __name__ == "__main__":
main(Operator)
84 changes: 57 additions & 27 deletions operators/katib-ui/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,47 +15,46 @@
logger = logging.getLogger(__name__)


class CheckFailed(Exception):
""" Raise this exception if one of the checks in main fails. """

def __init__(self, msg, status_type=None):
super().__init__()

self.msg = msg
self.status_type = status_type
self.status = status_type(msg)


class Operator(CharmBase):
"""Deploys the katib-ui service."""

def __init__(self, framework):
super().__init__(framework)

if not self.model.unit.is_leader():
logger.info("Not a leader, skipping any work")
self.model.unit.status = ActiveStatus()
return

try:
self.interfaces = get_interfaces(self)
except NoVersionsListed as err:
self.model.unit.status = WaitingStatus(str(err))
return
except NoCompatibleVersions as err:
self.model.unit.status = BlockedStatus(str(err))
return
else:
self.model.unit.status = ActiveStatus()

self.image = OCIImageResource(self, "oci-image")
self.framework.observe(self.on.install, self.set_pod_spec)
self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)
self.framework.observe(
self.on["ingress"].relation_changed,
self.configure_ingress,
self.set_pod_spec,
)

def configure_ingress(self, event):
if self.interfaces["ingress"]:
self.interfaces["ingress"].send_data(
{
"prefix": "/katib/",
"service": self.model.app.name,
"port": self.model.config["port"],
}
)

def set_pod_spec(self, event):
try:

self._check_leader()

interfaces = self._get_interfaces()

image_details = self._check_image_details()

except CheckFailed as check_failed:
self.model.unit.status = check_failed.status
return

self._configure_ingress(interfaces)

self.model.unit.status = MaintenanceStatus("Setting pod spec")

try:
Expand Down Expand Up @@ -115,6 +114,37 @@ def set_pod_spec(self, event):

self.model.unit.status = ActiveStatus()

def _configure_ingress(self, interfaces):
if interfaces["ingress"]:
interfaces["ingress"].send_data(
{
"prefix": "/katib/",
"service": self.model.app.name,
"port": self.model.config["port"],
}
)

def _check_leader(self):
if not self.unit.is_leader():
# We can't do anything useful when not the leader, so do nothing.
raise CheckFailed("Waiting for leadership", WaitingStatus)

def _get_interfaces(self):
try:
interfaces = get_interfaces(self)
except NoVersionsListed as err:
raise CheckFailed(err, WaitingStatus)
except NoCompatibleVersions as err:
raise CheckFailed(err, BlockedStatus)
return interfaces

def _check_image_details(self):
try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
raise CheckFailed(f"{e.status.message}", e.status_type)
return image_details


if __name__ == "__main__":
main(Operator)