Skip to content

Commit

Permalink
added tls support for worker checks (#59)
Browse files Browse the repository at this point in the history
* added tls support for worker checks

* lint

* static fix
  • Loading branch information
PietroPasotti authored Aug 20, 2024
1 parent 4f26423 commit afcfbec
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 30 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "cosl"
version = "0.0.22"
version = "0.0.23"
authors = [
{ name="sed-i", email="82407168+sed-i@users.noreply.github.com" },
]
Expand Down
31 changes: 15 additions & 16 deletions src/cosl/coordinated_workers/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from enum import Enum
from functools import partial
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple, TypedDict
from typing import Any, Callable, Dict, List, Optional, Tuple, TypedDict, Union

import ops
import tenacity
Expand Down Expand Up @@ -68,7 +68,7 @@ def __init__(
name: str,
pebble_layer: Callable[["Worker"], Layer],
endpoints: _EndpointMapping,
readiness_check_endpoint: Optional[str] = None,
readiness_check_endpoint: Optional[Union[str, Callable[["Worker"], str]]] = None,
):
"""Constructor for a Worker object.
Expand All @@ -88,7 +88,12 @@ def __init__(
self._container = self._charm.unit.get_container(name)

self._endpoints = endpoints
self._readiness_check_endpoint = readiness_check_endpoint
# turn str to Callable[[Worker], str]
self._readiness_check_endpoint: Optional[Callable[[Worker], str]]
if isinstance(readiness_check_endpoint, str):
self._readiness_check_endpoint = lambda _: readiness_check_endpoint
else:
self._readiness_check_endpoint = readiness_check_endpoint

self.cluster = ClusterRequirer(
charm=self._charm,
Expand Down Expand Up @@ -129,18 +134,12 @@ def _on_pebble_ready(self, _: ops.PebbleReadyEvent):

def _on_pebble_check_failed(self, event: ops.PebbleCheckFailedEvent):
if event.info.name == "ready":
logger.warning(
f"Pebble `ready` check on {self._readiness_check_endpoint} started to fail: "
f"worker node is down."
)
logger.warning("Pebble `ready` check started to fail: " "worker node is down.")
# collect-status will detect that we're not ready and set waiting status.

def _on_pebble_check_recovered(self, event: ops.PebbleCheckFailedEvent):
if event.info.name == "ready":
logger.info(
f"Pebble `ready` check on {self._readiness_check_endpoint} is passing: "
f"worker node is up."
)
logger.info("Pebble `ready` check is now passing: " "worker node is up.")
# collect-status will detect that we're ready and set active status.

def _on_worker_config_received(self, _: ops.EventBase):
Expand Down Expand Up @@ -197,7 +196,7 @@ def status(self) -> ServiceEndpointStatus:
logger.info("All services are down.")
return ServiceEndpointStatus.down

with urllib.request.urlopen(check_endpoint) as response:
with urllib.request.urlopen(check_endpoint(self)) as response:
html: bytes = response.read()

# ready response should simply be a string:
Expand Down Expand Up @@ -240,14 +239,14 @@ def _on_collect_status(self, e: ops.CollectStatusEvent):
BlockedStatus("Invalid or no roles assigned: please configure some valid roles")
)

if self._readiness_check_endpoint:
try:
status = self.status
if status == ServiceEndpointStatus.starting:
e.add_status(WaitingStatus("Starting..."))
elif status == ServiceEndpointStatus.down:
e.add_status(BlockedStatus("node down (see logs)"))
else:
logger.debug("Unable to determine worker readiness: missing an endpoint to check.")
except WorkerError:
logger.debug("Unable to determine worker readiness: no endpoint given.")

e.add_status(
ActiveStatus(
Expand Down Expand Up @@ -341,7 +340,7 @@ def _add_readiness_check(self, new_layer: Layer):
return

new_layer.checks["ready"] = Check(
"ready", {"override": "replace", "http": {"url": self._readiness_check_endpoint}}
"ready", {"override": "replace", "http": {"url": self._readiness_check_endpoint(self)}}
)

def _update_cluster_relation(self) -> None:
Expand Down
35 changes: 22 additions & 13 deletions tests/test_coordinated_workers/test_worker_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@
from cosl.coordinated_workers.worker import Worker, WorkerError


@pytest.fixture(params=[True, False])
def tls(request):
return request.param


@contextmanager
def _urlopen_patch(url: str, resp):
if url == "http://localhost:3200/ready":
def _urlopen_patch(url: str, resp: str, tls: bool):
if url == f"{'https' if tls else 'http'}://localhost:3200/ready":
mm = MagicMock()
mm.read = MagicMock(return_value=resp.encode("utf-8"))
yield mm
Expand All @@ -22,7 +27,7 @@ def _urlopen_patch(url: str, resp):


@pytest.fixture
def ctx():
def ctx(tls):
class MyCharm(CharmBase):
def __init__(self, framework: Framework):
super().__init__(framework)
Expand All @@ -31,9 +36,12 @@ def __init__(self, framework: Framework):
"workload",
lambda _: Layer(""),
{"cluster": "cluster"},
readiness_check_endpoint="http://localhost:3200/ready",
readiness_check_endpoint=self._readiness_check_endpoint,
)

def _readiness_check_endpoint(self, _):
return f"{'https' if tls else 'http'}://localhost:3200/ready"

return Context(
MyCharm,
meta={
Expand Down Expand Up @@ -63,16 +71,17 @@ def base_state(request):


@contextmanager
def endpoint_starting():
def endpoint_starting(tls):
with patch(
"urllib.request.urlopen", new=partial(_urlopen_patch, resp="foo\nStarting: 10\n bar")
"urllib.request.urlopen",
new=partial(_urlopen_patch, tls=tls, resp="foo\nStarting: 10\n bar"),
):
yield


@contextmanager
def endpoint_ready():
with patch("urllib.request.urlopen", new=partial(_urlopen_patch, resp="ready")):
def endpoint_ready(tls):
with patch("urllib.request.urlopen", new=partial(_urlopen_patch, tls=tls, resp="ready")):
yield


Expand Down Expand Up @@ -110,9 +119,9 @@ def test_status_check_no_config(ctx, base_state, caplog):
assert "Config file not on disk. Skipping status check." in caplog.messages


def test_status_check_starting(ctx, base_state):
def test_status_check_starting(ctx, base_state, tls):
# GIVEN getting the status returns "Starting: X"
with endpoint_starting():
with endpoint_starting(tls):
# AND GIVEN that the config is on disk
with config_on_disk():
# AND GIVEN that the container can connect
Expand All @@ -123,9 +132,9 @@ def test_status_check_starting(ctx, base_state):
assert state_out.unit_status == WaitingStatus("Starting...")


def test_status_check_ready(ctx, base_state):
def test_status_check_ready(ctx, base_state, tls):
# GIVEN getting the status returns "ready"
with endpoint_ready():
with endpoint_ready(tls):
# AND GIVEN that the config is on disk
with config_on_disk():
# AND GIVEN that the container can connect
Expand Down Expand Up @@ -170,7 +179,7 @@ def __init__(self, framework: Framework):
# THEN the charm sets Active: ready, even though we have no idea whether the endpoint is ready.
assert state_out.unit_status == ActiveStatus("read,write ready.")
# AND THEN the charm logs that we can't determine the readiness
assert "Unable to determine worker readiness: missing an endpoint to check." in caplog.messages
assert "Unable to determine worker readiness: no endpoint given." in caplog.messages


def test_access_status_no_endpoint_raises():
Expand Down

0 comments on commit afcfbec

Please sign in to comment.