Skip to content

Commit

Permalink
fix: uninstall custom signal handlers before shutdown (canonical#5913)
Browse files Browse the repository at this point in the history
  • Loading branch information
holmanb authored and TheRealFalcon committed Jan 13, 2025
1 parent c9b9eef commit 4d559e7
Show file tree
Hide file tree
Showing 10 changed files with 154 additions and 33 deletions.
5 changes: 3 additions & 2 deletions cloudinit/config/cc_ansible.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from copy import deepcopy
from typing import Optional

from cloudinit import lifecycle, subp
from cloudinit import lifecycle, signal_handler, subp
from cloudinit.cloud import Cloud
from cloudinit.config import Config
from cloudinit.config.schema import MetaSchema
Expand Down Expand Up @@ -63,7 +63,8 @@ def do_as(self, command: list, **kwargs):
return self.distro.do_as(command, self.run_user, **kwargs)

def subp(self, command, **kwargs):
return subp.subp(command, update_env=self.env, **kwargs)
with signal_handler.suspend_crash():
return subp.subp(command, update_env=self.env, **kwargs)

@abc.abstractmethod
def is_installed(self):
Expand Down
7 changes: 5 additions & 2 deletions cloudinit/config/cc_bootcmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import logging

from cloudinit import subp, temp_utils, util
from cloudinit import signal_handler, subp, temp_utils, util
from cloudinit.cloud import Cloud
from cloudinit.config import Config
from cloudinit.config.schema import MetaSchema
Expand Down Expand Up @@ -50,7 +50,10 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:
try:
iid = cloud.get_instance_id()
env = {"INSTANCE_ID": str(iid)} if iid else {}
subp.subp(["/bin/sh", tmpf.name], update_env=env, capture=False)
with signal_handler.suspend_crash():
subp.subp(
["/bin/sh", tmpf.name], update_env=env, capture=False
)
except Exception:
util.logexc(LOG, "Failed to run bootcmd module %s", name)
raise
12 changes: 8 additions & 4 deletions cloudinit/config/cc_package_update_upgrade_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os
import time

from cloudinit import subp, util
from cloudinit import signal_handler, subp, util
from cloudinit.cloud import Cloud
from cloudinit.config import Config
from cloudinit.config.schema import MetaSchema
Expand Down Expand Up @@ -48,7 +48,10 @@ def _fire_reboot(
wait_attempts: int = 6, initial_sleep: int = 1, backoff: int = 2
):
"""Run a reboot command and panic if it doesn't happen fast enough."""
subp.subp(REBOOT_CMD)
# systemd will kill cloud-init with a signal
# this is expected so don't behave as if this is a failure state
with signal_handler.suspend_crash():
subp.subp(REBOOT_CMD)
start = time.monotonic()
wait_time = initial_sleep
for _i in range(wait_attempts):
Expand Down Expand Up @@ -106,8 +109,9 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:
break
if (upgrade or pkglist) and reboot_if_required and reboot_fn_exists:
try:
LOG.warning(
"Rebooting after upgrade or install per %s", reboot_marker
LOG.info(
"***WARNING*** Rebooting after upgrade or install per %s",
reboot_marker,
)
# Flush the above warning + anything else out...
flush_loggers(LOG)
Expand Down
7 changes: 5 additions & 2 deletions cloudinit/config/cc_power_state_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import subprocess
import time

from cloudinit import subp, util
from cloudinit import signal_handler, subp, util
from cloudinit.cloud import Cloud
from cloudinit.config import Config
from cloudinit.config.schema import MetaSchema
Expand Down Expand Up @@ -217,4 +217,7 @@ def fatal(msg):
except Exception as e:
fatal("Unexpected Exception when checking condition: %s" % e)

func(*args)
# systemd could kill this process with a signal before it exits
# this is expected, so don't crash
with signal_handler.suspend_crash():
func(*args)
92 changes: 75 additions & 17 deletions cloudinit/signal_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,64 @@
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
#
# This file is part of cloud-init. See LICENSE file for license information.
import contextlib
import inspect
import logging
import signal
import sys
import threading
import types
from io import StringIO
from typing import Callable, Dict, Final, NamedTuple, Union

from cloudinit import version as vr
from cloudinit.log import log_util

LOG = logging.getLogger(__name__)


BACK_FRAME_TRACE_DEPTH = 3
EXIT_FOR = {
signal.SIGINT: ("Cloud-init %(version)s received SIGINT, exiting...", 1),
signal.SIGTERM: ("Cloud-init %(version)s received SIGTERM, exiting...", 1),
# Can't be caught...
# signal.SIGKILL: ('Cloud-init killed, exiting...', 1),
signal.SIGABRT: ("Cloud-init %(version)s received SIGABRT, exiting...", 1),
SIG_MESSAGE: Final = "Cloud-init {} received {}, exiting\n"
BACK_FRAME_TRACE_DEPTH: Final = 3
SIGNALS: Final[Dict[int, str]] = {
signal.SIGINT: "Cloud-init %(version)s received SIGINT, exiting",
signal.SIGTERM: "Cloud-init %(version)s received SIGTERM, exiting",
signal.SIGABRT: "Cloud-init %(version)s received SIGABRT, exiting",
}


class ExitBehavior(NamedTuple):
exit_code: int
log_level: int


SIGNAL_EXIT_BEHAVIOR_CRASH: Final = ExitBehavior(1, logging.ERROR)
SIGNAL_EXIT_BEHAVIOR_QUIET: Final = ExitBehavior(0, logging.INFO)
_SIGNAL_EXIT_BEHAVIOR = SIGNAL_EXIT_BEHAVIOR_CRASH
_SUSPEND_WRITE_LOCK = threading.RLock()


def inspect_handler(sig: Union[int, Callable, None]) -> None:
"""inspect_handler() logs signal handler state"""
if callable(sig):
# only produce a log when the signal handler isn't in the expected
# default state
if not isinstance(sig, types.BuiltinFunctionType):
LOG.info("Signal state [%s] - previously custom handler.", sig)
elif sig == signal.SIG_IGN:
LOG.info("Signal state [SIG_IGN] - previously ignored.")
elif sig is None:
LOG.info("Signal state [None] - previously not installed from Python.")
elif sig == signal.SIG_DFL:
LOG.info(
"Signal state [%s] - default way of handling signal was "
"previously in use.",
sig,
)
else:
# this should never happen, unless something in Python changes
# https://docs.python.org/3/library/signal.html#signal.getsignal
LOG.warning("Signal state [%s(%s)] - unknown", type(sig), sig)


def _pprint_frame(frame, depth, max_depth, contents):
if depth > max_depth or not frame:
return
Expand All @@ -39,18 +75,40 @@ def _pprint_frame(frame, depth, max_depth, contents):


def _handle_exit(signum, frame):
(msg, rc) = EXIT_FOR[signum]
msg = msg % ({"version": vr.version_string()})
contents = StringIO()
contents.write("%s\n" % (msg))
# in practice we always receive a Signals object but int is possible
name = signum.name if isinstance(signum, signal.Signals) else signum
contents = StringIO(SIG_MESSAGE.format(vr.version_string(), name))
_pprint_frame(frame, 1, BACK_FRAME_TRACE_DEPTH, contents)
log_util.multi_log(contents.getvalue(), log=LOG, log_level=logging.ERROR)
sys.exit(rc)
log_util.multi_log(
contents.getvalue(), log=LOG, log_level=_SIGNAL_EXIT_BEHAVIOR.log_level
)
sys.exit(_SIGNAL_EXIT_BEHAVIOR.exit_code)


def attach_handlers():
"""attach cloud-init's handlers"""
sigs_attached = 0
for signum in EXIT_FOR.keys():
signal.signal(signum, _handle_exit)
sigs_attached += len(EXIT_FOR)
for signum in SIGNALS.keys():
inspect_handler(signal.signal(signum, _handle_exit))
sigs_attached += len(SIGNALS)
return sigs_attached


@contextlib.contextmanager
def suspend_crash():
"""suspend_crash() allows signals to be received without exiting 1
This allow signal handling without a crash where it is expected. The
call stack is still printed if signal is received during this context, but
the return code is 0 and no traceback is printed.
Threadsafe.
"""
global _SIGNAL_EXIT_BEHAVIOR

# If multiple threads simultaneously were to modify this
# global state, this function would not behave as expected.
with _SUSPEND_WRITE_LOCK:
_SIGNAL_EXIT_BEHAVIOR = SIGNAL_EXIT_BEHAVIOR_QUIET
yield
_SIGNAL_EXIT_BEHAVIOR = SIGNAL_EXIT_BEHAVIOR_CRASH
5 changes: 3 additions & 2 deletions cloudinit/subp.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from io import TextIOWrapper
from typing import List, Optional, Union

from cloudinit import performance
from cloudinit import performance, signal_handler

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -368,7 +368,8 @@ def runparts(dirp, skip_no_exist=True, exe_prefix=None):
if is_exe(exe_path):
attempted.append(exe_path)
try:
subp(prefix + [exe_path], capture=False)
with signal_handler.suspend_crash():
subp(prefix + [exe_path], capture=False)
except ProcessExecutionError as e:
LOG.debug(e)
failed.append(exe_name)
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests/integration_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
# Install from a PPA. It MUST start with 'ppa:'
# <file path>
# A path to a valid package to be uploaded and installed
CLOUD_INIT_SOURCE = "NONE"
CLOUD_INIT_SOURCE = "IN_PLACE"

# cloud-init metapackage to install
# Examples: cloud-init, cloud-init-base, cloud-init-smart-os
Expand Down
4 changes: 2 additions & 2 deletions tests/integration_tests/modules/test_power_state_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Test that the power state config options work as expected.
"""

import re
import time

import pytest
Expand Down Expand Up @@ -52,7 +53,6 @@ def _can_connect(instance):
# This test is marked unstable because even though it should be able to
# run anywhere, I can only get it to run in an lxd container, and even then
# occasionally some timing issues will crop up.
@pytest.mark.unstable
@pytest.mark.skipif(not IS_UBUNTU, reason="Only ever tested on Ubuntu")
@pytest.mark.skipif(
PLATFORM != "lxd_container",
Expand All @@ -64,7 +64,7 @@ class TestPowerChange:
[
("poweroff", "now", "10", "will execute: shutdown -P now msg"),
("reboot", "now", "0", "will execute: shutdown -r now msg"),
("halt", "+1", "0", "will execute: shutdown -H +1 msg"),
("halt", "+1", "0", re.escape("will execute: shutdown -H +1 msg")),
],
)
def test_poweroff(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _isfile(filename: str):
# subp and not really rebooting the system
subp_call = ["/sbin/reboot"]

caplog.set_level(logging.WARNING)
caplog.set_level(logging.INFO)
with mock.patch(
"cloudinit.subp.subp", return_value=SubpResult("{}", "fakeerr")
) as m_subp:
Expand Down
51 changes: 51 additions & 0 deletions tests/unittests/test_signal_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""cloudinit.signal_handler tests"""

import inspect
import signal
from unittest.mock import Mock, patch

import pytest

from cloudinit import signal_handler

REENTRANT = "reentrant"


class TestSignalHandler:

@pytest.mark.parametrize(
"m_args",
[
(signal.SIGINT, inspect.currentframe()),
(9, None),
(signal.SIGTERM, None),
(1, inspect.currentframe()),
],
)
@pytest.mark.parametrize(
"m_suspended",
[
(REENTRANT, 0),
(True, 0),
(False, 1),
],
)
def test_suspend_signal(self, m_args, m_suspended):
"""suspend_crash should prevent crashing (exit 1) on signal
otherwise cloud-init should exit 1
"""
sig, frame = m_args
suspended, rc = m_suspended

with patch.object(signal_handler.sys, "exit", Mock()) as m_exit:
if suspended is True:
with signal_handler.suspend_crash():
signal_handler._handle_exit(sig, frame)
elif suspended == REENTRANT:
with signal_handler.suspend_crash():
with signal_handler.suspend_crash():
signal_handler._handle_exit(sig, frame)
else:
signal_handler._handle_exit(sig, frame)
m_exit.assert_called_with(rc)

0 comments on commit 4d559e7

Please sign in to comment.