From 6eb22fb07f3f73d1399563cbd1c031140297063a Mon Sep 17 00:00:00 2001 From: James Falcon Date: Tue, 12 Dec 2023 13:17:13 -0600 Subject: [PATCH] wait on systemd if wait=True, otherwise error to stderr --- cloudinit/cmd/status.py | 31 +++++++++++++++++++----------- tests/unittests/cmd/test_status.py | 12 ++++++------ 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/cloudinit/cmd/status.py b/cloudinit/cmd/status.py index d0237076121b..d952c20a7064 100644 --- a/cloudinit/cmd/status.py +++ b/cloudinit/cmd/status.py @@ -138,7 +138,7 @@ def handle_status_args(name, args) -> int: """Handle calls to 'cloud-init status' as a subcommand.""" # Read configured paths paths = read_cfg_paths() - details = get_status_details(paths) + details = get_status_details(paths, args.wait) if args.wait: while details.status in ( UXAppStatus.NOT_RUN, @@ -148,7 +148,7 @@ def handle_status_args(name, args) -> int: if args.format == "tabular": sys.stdout.write(".") sys.stdout.flush() - details = get_status_details(paths) + details = get_status_details(paths, args.wait) sleep(0.25) details_dict: Dict[str, Union[None, str, List[str], Dict[str, Any]]] = { "datasource": details.datasource, @@ -329,7 +329,7 @@ def _get_error_or_running_from_systemd() -> Optional[UXAppStatus]: def _get_error_or_running_from_systemd_with_retry( - existing_status, max_wait=5 + existing_status: UXAppStatus, *, wait: bool ) -> Optional[UXAppStatus]: """Get systemd status and retry if dbus isn't ready. @@ -338,26 +338,33 @@ def _get_error_or_running_from_systemd_with_retry( then we should retry on systemd status so we don't incorrectly report error state while cloud-init is still running. """ - start_time = time.time() - while time.time() - start_time < max_wait: + last_exception = subp.ProcessExecutionError + while True: try: return _get_error_or_running_from_systemd() - except subp.ProcessExecutionError: + except subp.ProcessExecutionError as e: + last_exception = e if existing_status in ( UXAppStatus.DEGRADED_RUNNING, UXAppStatus.RUNNING, ): return None - sleep(0.25) + if wait: + sleep(0.25) + else: + break print( - f"Failed to get status from systemd after {max_wait} seconds. " - "Cloud-init may still be running.", + "Failed to get status from systemd. " + "Cloud-init status may be inaccurate. ", + f"Error from systemctl: {last_exception.stderr}", file=sys.stderr, ) return None -def get_status_details(paths: Optional[Paths] = None) -> StatusDetails: +def get_status_details( + paths: Optional[Paths] = None, wait: bool = False +) -> StatusDetails: """Return a dict with status, details and errors. @param paths: An initialized cloudinit.helpers.paths object. @@ -428,7 +435,9 @@ def get_status_details(paths: Optional[Paths] = None) -> StatusDetails: UXAppStatus.NOT_RUN, UXAppStatus.DISABLED, ): - systemd_status = _get_error_or_running_from_systemd_with_retry(status) + systemd_status = _get_error_or_running_from_systemd_with_retry( + status, wait + ) if systemd_status: status = systemd_status diff --git a/tests/unittests/cmd/test_status.py b/tests/unittests/cmd/test_status.py index abb3b7e3838f..6e4eac4bc240 100644 --- a/tests/unittests/cmd/test_status.py +++ b/tests/unittests/cmd/test_status.py @@ -960,7 +960,7 @@ def test_exception_while_running(self, mocker, capsys): ) assert ( _get_error_or_running_from_systemd_with_retry( - UXAppStatus.RUNNING, max_wait=1000 + UXAppStatus.RUNNING, wait=True ) is None ) @@ -988,14 +988,14 @@ def test_retry(self, mocker, capsys): ) assert ( _get_error_or_running_from_systemd_with_retry( - UXAppStatus.ERROR, max_wait=1000 + UXAppStatus.ERROR, wait=True ) is UXAppStatus.RUNNING ) assert 3 == m_subp.call_count assert "Failed to get status" not in capsys.readouterr().err - def test_retry_timeout(self, mocker, capsys): + def test_retry_no_wait(self, mocker, capsys): m_subp = mocker.patch( f"{M_PATH}subp.subp", side_effect=subp.ProcessExecutionError( @@ -1006,12 +1006,12 @@ def test_retry_timeout(self, mocker, capsys): mocker.patch("time.time", side_effect=[1, 2, 50]) assert ( _get_error_or_running_from_systemd_with_retry( - UXAppStatus.ERROR, max_wait=5 + UXAppStatus.ERROR, wait=False ) is None ) assert 1 == m_subp.call_count assert ( - "Failed to get status from systemd after 5 seconds. " - "Cloud-init may still be running." + "Failed to get status from systemd. " + "Cloud-init status may be inaccurate." ) in capsys.readouterr().err