From 0a28e05d9f55426164864e3ba3a641521a394a9d Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Thu, 21 Dec 2023 15:11:22 -0500 Subject: [PATCH 1/6] azure: remove cloud-init.log reporting via KVP With the recent changes in failure reporting and diagnostics, the full log is unnecessary overhead. Rename push_log_to_kvp() => report_dmesg_to_kvp() now that it only captures the dmesg log. Signed-off-by: Chris Patterson --- cloudinit/sources/DataSourceAzure.py | 4 +- cloudinit/sources/helpers/azure.py | 67 ++-------------------------- 2 files changed, 5 insertions(+), 66 deletions(-) diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 4c5780f767d..f2432bc9a8b 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -44,7 +44,7 @@ get_ip_from_lease_value, get_metadata_from_fabric, get_system_info, - push_log_to_kvp, + report_dmesg_to_kvp, report_diagnostic_event, report_failure_to_fabric, ) @@ -1473,7 +1473,7 @@ def activate(self, cfg, is_new_instance): preserve_ntfs=self.ds_cfg.get(DS_CFG_KEY_PRESERVE_NTFS, False), ) finally: - push_log_to_kvp(self.sys_cfg["def_log_file"]) + report_dmesg_to_kvp() return @property diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 9d85c6b4e40..8cbd8a4befa 100644 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -10,7 +10,6 @@ import zlib from contextlib import contextmanager from datetime import datetime -from errno import ENOENT from time import sleep, time from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar, Union from xml.etree import ElementTree @@ -18,7 +17,6 @@ from cloudinit import distros, subp, temp_utils, url_helper, util, version from cloudinit.reporting import events -from cloudinit.settings import CFG_BUILTIN if TYPE_CHECKING: from cloudinit.sources.azure import errors @@ -32,14 +30,6 @@ SYSTEMINFO_EVENT_TYPE = "system-info" DIAGNOSTIC_EVENT_TYPE = "diagnostic" COMPRESSED_EVENT_TYPE = "compressed" -# Maximum number of bytes of the cloud-init.log file that can be dumped to KVP -# at once. This number is based on the analysis done on a large sample of -# cloud-init.log files where the P95 of the file sizes was 537KB and the time -# consumed to dump 500KB file was (P95:76, P99:233, P99.9:1170) in ms -MAX_LOG_TO_KVP_LENGTH = 512000 -# File to store the last byte of cloud-init.log that was pushed to KVP. This -# file will be deleted with every VM reboot. -LOG_PUSHED_TO_KVP_INDEX_FILE = "/run/cloud-init/log_pushed_to_kvp_index" azure_ds_reporter = events.ReportEventStack( name="azure-ds", description="initialize reporter for azure ds", @@ -212,35 +202,8 @@ def report_compressed_event(event_name, event_content): @azure_ds_telemetry_reporter -def push_log_to_kvp(file_name=CFG_BUILTIN["def_log_file"]): - """Push a portion of cloud-init.log file or the whole file to KVP - based on the file size. - The first time this function is called after VM boot, It will push the last - n bytes of the log file such that n < MAX_LOG_TO_KVP_LENGTH - If called again on the same boot, it continues from where it left off. - In addition to cloud-init.log, dmesg log will also be collected.""" - - start_index = get_last_log_byte_pushed_to_kvp_index() - - LOG.debug("Dumping cloud-init.log file to KVP") - try: - with open(file_name, "rb") as f: - f.seek(0, os.SEEK_END) - seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, start_index) - report_diagnostic_event( - "Dumping last {0} bytes of cloud-init.log file to KVP starting" - " from index: {1}".format(f.tell() - seek_index, seek_index), - logger_func=LOG.debug, - ) - f.seek(seek_index, os.SEEK_SET) - report_compressed_event("cloud-init.log", f.read()) - util.write_file(LOG_PUSHED_TO_KVP_INDEX_FILE, str(f.tell())) - except Exception as ex: - report_diagnostic_event( - "Exception when dumping log file: %s" % repr(ex), - logger_func=LOG.warning, - ) - +def report_dmesg_to_kvp(): + """Report dmesg to KVP.""" LOG.debug("Dumping dmesg log to KVP") try: out, _ = subp.subp(["dmesg"], decode=False, capture=True) @@ -252,30 +215,6 @@ def push_log_to_kvp(file_name=CFG_BUILTIN["def_log_file"]): ) -@azure_ds_telemetry_reporter -def get_last_log_byte_pushed_to_kvp_index(): - try: - with open(LOG_PUSHED_TO_KVP_INDEX_FILE, "r") as f: - return int(f.read()) - except IOError as e: - if e.errno != ENOENT: - report_diagnostic_event( - "Reading LOG_PUSHED_TO_KVP_INDEX_FILE failed: %s." % repr(e), - logger_func=LOG.warning, - ) - except ValueError as e: - report_diagnostic_event( - "Invalid value in LOG_PUSHED_TO_KVP_INDEX_FILE: %s." % repr(e), - logger_func=LOG.warning, - ) - except Exception as e: - report_diagnostic_event( - "Failed to get the last log byte pushed to KVP: %s." % repr(e), - logger_func=LOG.warning, - ) - return 0 - - @contextmanager def cd(newdir): prevdir = os.getcwd() @@ -744,7 +683,7 @@ def build_report( @azure_ds_telemetry_reporter def _post_health_report(self, document: bytes) -> None: - push_log_to_kvp() + report_dmesg_to_kvp() # Whenever report_diagnostic_event(diagnostic_msg) is invoked in code, # the diagnostic messages are written to special files From a8305db79ff0ecd5eb8bc6055c5e357f1220fd78 Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Thu, 4 Jan 2024 12:23:41 -0500 Subject: [PATCH 2/6] test_reporting_hyperv: remove obsolete tests Signed-off-by: Chris Patterson --- .../reporting/test_reporting_hyperv.py | 68 ------------------- 1 file changed, 68 deletions(-) diff --git a/tests/unittests/reporting/test_reporting_hyperv.py b/tests/unittests/reporting/test_reporting_hyperv.py index 0464f485d5d..256cd9c28fa 100644 --- a/tests/unittests/reporting/test_reporting_hyperv.py +++ b/tests/unittests/reporting/test_reporting_hyperv.py @@ -234,74 +234,6 @@ def test_report_compressed_event(self): "telemetry", force=False ) - @mock.patch("cloudinit.sources.helpers.azure.report_compressed_event") - @mock.patch("cloudinit.sources.helpers.azure.report_diagnostic_event") - @mock.patch("cloudinit.subp.subp") - def test_push_log_to_kvp_exception_handling(self, m_subp, m_diag, m_com): - reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) - try: - instantiated_handler_registry.register_item("telemetry", reporter) - log_file = self.tmp_path("cloud-init.log") - azure.MAX_LOG_TO_KVP_LENGTH = 100 - azure.LOG_PUSHED_TO_KVP_INDEX_FILE = self.tmp_path( - "log_pushed_to_kvp" - ) - with open(log_file, "w") as f: - log_content = "A" * 50 + "B" * 100 - f.write(log_content) - - m_com.side_effect = Exception("Mock Exception") - azure.push_log_to_kvp(log_file) - - # exceptions will trigger diagnostic reporting calls - self.assertEqual(m_diag.call_count, 3) - finally: - instantiated_handler_registry.unregister_item( - "telemetry", force=False - ) - - @mock.patch("cloudinit.subp.subp") - @mock.patch.object(LogHandler, "publish_event") - def test_push_log_to_kvp(self, publish_event, m_subp): - reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) - try: - instantiated_handler_registry.register_item("telemetry", reporter) - log_file = self.tmp_path("cloud-init.log") - azure.MAX_LOG_TO_KVP_LENGTH = 100 - azure.LOG_PUSHED_TO_KVP_INDEX_FILE = self.tmp_path( - "log_pushed_to_kvp" - ) - with open(log_file, "w") as f: - log_content = "A" * 50 + "B" * 100 - f.write(log_content) - azure.push_log_to_kvp(log_file) - - with open(log_file, "a") as f: - extra_content = "C" * 10 - f.write(extra_content) - azure.push_log_to_kvp(log_file) - - # make sure dmesg is called every time - m_subp.assert_called_with(["dmesg"], capture=True, decode=False) - - for call_arg in publish_event.call_args_list: - event = call_arg[0][0] - self.assertNotEqual( - event.event_type, azure.COMPRESSED_EVENT_TYPE - ) - self.validate_compressed_kvps( - reporter, - 2, - [ - log_content[-azure.MAX_LOG_TO_KVP_LENGTH :].encode(), - extra_content.encode(), - ], - ) - finally: - instantiated_handler_registry.unregister_item( - "telemetry", force=False - ) - def validate_compressed_kvps(self, reporter, count, values): reporter.q.join() kvps = list(reporter._iterate_kvps(0)) From d551e9d1cb75762609763aae43a0db1652033258 Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Thu, 4 Jan 2024 12:49:05 -0500 Subject: [PATCH 3/6] isort --- cloudinit/sources/DataSourceAzure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index f2432bc9a8b..99843b03a7c 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -44,8 +44,8 @@ get_ip_from_lease_value, get_metadata_from_fabric, get_system_info, - report_dmesg_to_kvp, report_diagnostic_event, + report_dmesg_to_kvp, report_failure_to_fabric, ) from cloudinit.url_helper import UrlError From b1bcff6272873dc8e0630beab348a4cb872faf23 Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Thu, 4 Jan 2024 12:49:56 -0500 Subject: [PATCH 4/6] remove unused import --- cloudinit/sources/helpers/azure.py | 5 ----- tests/unittests/reporting/test_reporting_hyperv.py | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 8cbd8a4befa..f223ffa9013 100644 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -329,7 +329,6 @@ def build_minimal_ovf( class AzureEndpointHttpClient: - headers = { "x-ms-agent-name": "WALinuxAgent", "x-ms-version": "2012-11-30", @@ -426,7 +425,6 @@ def _text_from_xpath(self, xpath): class OpenSSLManager: - certificate_names = { "private_key": "TransportPrivate.pem", "certificate": "TransportCert.pem", @@ -555,7 +553,6 @@ def parse_certificates(self, certificates_xml): class GoalStateHealthReporter: - HEALTH_REPORT_XML_TEMPLATE = textwrap.dedent( """\ @@ -683,8 +680,6 @@ def build_report( @azure_ds_telemetry_reporter def _post_health_report(self, document: bytes) -> None: - report_dmesg_to_kvp() - # Whenever report_diagnostic_event(diagnostic_msg) is invoked in code, # the diagnostic messages are written to special files # (/var/opt/hyperv/.kvp_pool_*) as Hyper-V KVP messages. diff --git a/tests/unittests/reporting/test_reporting_hyperv.py b/tests/unittests/reporting/test_reporting_hyperv.py index 256cd9c28fa..e9f2b213ff2 100644 --- a/tests/unittests/reporting/test_reporting_hyperv.py +++ b/tests/unittests/reporting/test_reporting_hyperv.py @@ -10,7 +10,7 @@ from cloudinit import util from cloudinit.reporting import events, instantiated_handler_registry -from cloudinit.reporting.handlers import HyperVKvpReportingHandler, LogHandler +from cloudinit.reporting.handlers import HyperVKvpReportingHandler from cloudinit.sources.helpers import azure from tests.unittests.helpers import CiTestCase From 3a96d8b8af219f13c5e051a6e2fa220d5cc1f073 Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Mon, 22 Jan 2024 11:32:41 -0500 Subject: [PATCH 5/6] raise call to report_dmesg_to_kvp to _report_success/failure() and add tests --- cloudinit/sources/DataSourceAzure.py | 5 +-- tests/unittests/sources/test_azure.py | 36 +++++++++++++------- tests/unittests/sources/test_azure_helper.py | 35 +++++++++++++++++-- 3 files changed, 59 insertions(+), 17 deletions(-) diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 99843b03a7c..2324ca515e3 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -313,7 +313,6 @@ def get_resource_disk_on_freebsd(port_id) -> Optional[str]: class DataSourceAzure(sources.DataSource): - dsname = "Azure" default_update_events = { EventScope.NETWORK: { @@ -1306,6 +1305,7 @@ def _report_failure( f"Azure datasource failure occurred: {error.as_encoded_report()}", logger_func=LOG.error, ) + report_dmesg_to_kvp() reported = kvp.report_failure_to_host(error) if host_only: return reported @@ -1365,6 +1365,7 @@ def _report_ready( :returns: List of SSH keys, if requested. """ + report_dmesg_to_kvp() kvp.report_success_to_host() try: @@ -1822,7 +1823,7 @@ def _redact_password(cnt, fname): if not files: files = {} util.ensure_dir(datadir, dirmode) - for (name, content) in files.items(): + for name, content in files.items(): fname = os.path.join(datadir, name) if "ovf-env.xml" in name: content = _redact_password(content, fname) diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 2a477f80239..3c357190e4e 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -169,6 +169,16 @@ def mock_imds_fetch_metadata_with_api_fallback(): yield m +@pytest.fixture(autouse=True) +def mock_report_dmesg_to_kvp(): + with mock.patch( + MOCKPATH + "report_dmesg_to_kvp", + return_value=True, + autospec=True, + ) as m: + yield m + + @pytest.fixture def mock_kvp_report_failure_to_host(): with mock.patch( @@ -355,17 +365,6 @@ def patched_reported_ready_marker_path(azure_ds, patched_markers_dir_path): yield reported_ready_marker -@pytest.fixture -def telemetry_reporter(tmp_path): - kvp_file_path = tmp_path / "kvp_pool_file" - kvp_file_path.write_bytes(b"") - reporter = HyperVKvpReportingHandler(kvp_file_path=str(kvp_file_path)) - - dsaz.kvp.instantiated_handler_registry.register_item("telemetry", reporter) - yield reporter - dsaz.kvp.instantiated_handler_registry.unregister_item("telemetry") - - def fake_http_error_for_code(status_code: int): response_failure = requests.Response() response_failure.status_code = status_code @@ -3637,6 +3636,7 @@ def provisioning_setup( mock_kvp_report_success_to_host, mock_netlink, mock_readurl, + mock_report_dmesg_to_kvp, mock_subp_subp, mock_timestamp, mock_util_ensure_dir, @@ -3666,6 +3666,7 @@ def provisioning_setup( self.mock_kvp_report_success_to_host = mock_kvp_report_success_to_host self.mock_netlink = mock_netlink self.mock_readurl = mock_readurl + self.mock_report_dmesg_to_kvp = mock_report_dmesg_to_kvp self.mock_subp_subp = mock_subp_subp self.mock_timestmp = mock_timestamp self.mock_util_ensure_dir = mock_util_ensure_dir @@ -3771,6 +3772,9 @@ def test_no_pps(self): assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0 assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1 + # Verify dmesg reported via KVP. + assert len(self.mock_report_dmesg_to_kvp.mock_calls) == 1 + @pytest.mark.parametrize("pps_type", ["Savable", "Running"]) def test_stale_pps(self, pps_type): imds_md_source = copy.deepcopy(self.imds_md) @@ -3944,6 +3948,9 @@ def test_running_pps(self): assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0 assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2 + # Verify dmesg reported via KVP. + assert len(self.mock_report_dmesg_to_kvp.mock_calls) == 2 + def test_savable_pps(self): imds_md_source = copy.deepcopy(self.imds_md) imds_md_source["extended"]["compute"]["ppsType"] = "Savable" @@ -4062,6 +4069,9 @@ def test_savable_pps(self): assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0 assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2 + # Verify dmesg reported via KVP. + assert len(self.mock_report_dmesg_to_kvp.mock_calls) == 2 + @pytest.mark.parametrize( "fabric_side_effect", [ @@ -4522,13 +4532,14 @@ def test_errors( class TestReportFailure: @pytest.mark.parametrize("kvp_enabled", [False, True]) - def report_host_only_kvp_enabled( + def test_report_host_only_kvp_enabled( self, azure_ds, kvp_enabled, mock_azure_report_failure_to_fabric, mock_kvp_report_failure_to_host, mock_kvp_report_success_to_host, + mock_report_dmesg_to_kvp, ): mock_kvp_report_failure_to_host.return_value = kvp_enabled error = errors.ReportableError(reason="foo") @@ -4538,6 +4549,7 @@ def report_host_only_kvp_enabled( assert mock_kvp_report_failure_to_host.mock_calls == [mock.call(error)] assert mock_kvp_report_success_to_host.mock_calls == [] assert mock_azure_report_failure_to_fabric.mock_calls == [] + assert mock_report_dmesg_to_kvp.mock_calls == [mock.call()] class TestValidateIMDSMetadata: diff --git a/tests/unittests/sources/test_azure_helper.py b/tests/unittests/sources/test_azure_helper.py index 68d7f2287a6..c99593d6d32 100644 --- a/tests/unittests/sources/test_azure_helper.py +++ b/tests/unittests/sources/test_azure_helper.py @@ -6,11 +6,13 @@ from textwrap import dedent from xml.etree import ElementTree from xml.sax.saxutils import escape, unescape +from cloudinit.reporting.handlers import HyperVKvpReportingHandler import pytest import requests from cloudinit import url_helper +from cloudinit.reporting import instantiated_handler_registry from cloudinit.sources.azure import errors from cloudinit.sources.helpers import azure as azure_helper from cloudinit.sources.helpers.azure import WALinuxAgentShim as wa_shim @@ -146,7 +148,6 @@ def test_get_ip_from_lease_value(self, encoded_address, ip_address): class TestGoalStateParsing(CiTestCase): - default_parameters = { "incarnation": 1, "container_id": "MyContainerId", @@ -242,7 +243,6 @@ def test_missing_incarnation_in_goal_state_xml_raises_exc(self): class TestAzureEndpointHttpClient(CiTestCase): - regular_headers = { "x-ms-agent-name": "WALinuxAgent", "x-ms-version": "2012-11-30", @@ -556,7 +556,6 @@ def test_parse_certificates(self, mock_decrypt_certs): class TestGoalStateHealthReporter(CiTestCase): - maxDiff = None default_parameters = { @@ -1648,3 +1647,33 @@ def test_invalid_xml(self, ovf, error): azure_helper.OvfEnvXml.parse_text(ovf) assert str(exc_info.value) == error + + +class TestReportDmesgToKvp: + @mock.patch.object( + azure_helper.subp, "subp", return_value=("dmesg test", "") + ) + @mock.patch.object(azure_helper, "report_compressed_event") + def test_report_dmesg_to_kvp( + self, mock_report_compressed_event, mock_subp + ): + azure_helper.report_dmesg_to_kvp() + + assert mock_subp.mock_calls == [ + mock.call(["dmesg"], decode=False, capture=True) + ] + assert mock_report_compressed_event.mock_calls == [ + mock.call("dmesg", "dmesg test") + ] + + @mock.patch.object(azure_helper.subp, "subp", side_effect=[Exception()]) + @mock.patch.object(azure_helper, "report_compressed_event") + def test_report_dmesg_to_kvp_dmesg_error( + self, mock_report_compressed_event, mock_subp + ): + azure_helper.report_dmesg_to_kvp() + + assert mock_subp.mock_calls == [ + mock.call(["dmesg"], decode=False, capture=True) + ] + assert mock_report_compressed_event.mock_calls == [] From 91dd1259bf98875a49094df839c237dbb387aa93 Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Mon, 22 Jan 2024 11:44:53 -0500 Subject: [PATCH 6/6] cleanup unused imports --- tests/unittests/sources/test_azure.py | 1 - tests/unittests/sources/test_azure_helper.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 3c357190e4e..5bf6520d377 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -15,7 +15,6 @@ from cloudinit import distros, dmi, helpers, subp, url_helper from cloudinit.atomic_helper import b64e, json_dumps from cloudinit.net import dhcp, ephemeral -from cloudinit.reporting.handlers import HyperVKvpReportingHandler from cloudinit.sources import UNSET from cloudinit.sources import DataSourceAzure as dsaz from cloudinit.sources import InvalidMetaDataException diff --git a/tests/unittests/sources/test_azure_helper.py b/tests/unittests/sources/test_azure_helper.py index c99593d6d32..bd41e0f4bd1 100644 --- a/tests/unittests/sources/test_azure_helper.py +++ b/tests/unittests/sources/test_azure_helper.py @@ -6,13 +6,11 @@ from textwrap import dedent from xml.etree import ElementTree from xml.sax.saxutils import escape, unescape -from cloudinit.reporting.handlers import HyperVKvpReportingHandler import pytest import requests from cloudinit import url_helper -from cloudinit.reporting import instantiated_handler_registry from cloudinit.sources.azure import errors from cloudinit.sources.helpers import azure as azure_helper from cloudinit.sources.helpers.azure import WALinuxAgentShim as wa_shim