Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

azure: remove cloud-init.log reporting via KVP #4715

Merged
merged 6 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions cloudinit/sources/DataSourceAzure.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
get_ip_from_lease_value,
get_metadata_from_fabric,
get_system_info,
push_log_to_kvp,
report_diagnostic_event,
report_dmesg_to_kvp,
report_failure_to_fabric,
)
from cloudinit.url_helper import UrlError
Expand Down Expand Up @@ -313,7 +313,6 @@ def get_resource_disk_on_freebsd(port_id) -> Optional[str]:


class DataSourceAzure(sources.DataSource):

dsname = "Azure"
default_update_events = {
EventScope.NETWORK: {
Expand Down Expand Up @@ -1306,6 +1305,7 @@ def _report_failure(
f"Azure datasource failure occurred: {error.as_encoded_report()}",
logger_func=LOG.error,
)
report_dmesg_to_kvp()
reported = kvp.report_failure_to_host(error)
if host_only:
return reported
Expand Down Expand Up @@ -1365,6 +1365,7 @@ def _report_ready(

:returns: List of SSH keys, if requested.
"""
report_dmesg_to_kvp()
kvp.report_success_to_host()

try:
Expand Down Expand Up @@ -1473,7 +1474,7 @@ def activate(self, cfg, is_new_instance):
preserve_ntfs=self.ds_cfg.get(DS_CFG_KEY_PRESERVE_NTFS, False),
)
finally:
push_log_to_kvp(self.sys_cfg["def_log_file"])
report_dmesg_to_kvp()
return

@property
Expand Down Expand Up @@ -1822,7 +1823,7 @@ def _redact_password(cnt, fname):
if not files:
files = {}
util.ensure_dir(datadir, dirmode)
for (name, content) in files.items():
for name, content in files.items():
fname = os.path.join(datadir, name)
if "ovf-env.xml" in name:
content = _redact_password(content, fname)
Expand Down
70 changes: 2 additions & 68 deletions cloudinit/sources/helpers/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@
import zlib
from contextlib import contextmanager
from datetime import datetime
from errno import ENOENT
from time import sleep, time
from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar, Union
from xml.etree import ElementTree
from xml.sax.saxutils import escape

from cloudinit import distros, subp, temp_utils, url_helper, util, version
from cloudinit.reporting import events
from cloudinit.settings import CFG_BUILTIN

if TYPE_CHECKING:
from cloudinit.sources.azure import errors
Expand All @@ -32,14 +30,6 @@
SYSTEMINFO_EVENT_TYPE = "system-info"
DIAGNOSTIC_EVENT_TYPE = "diagnostic"
COMPRESSED_EVENT_TYPE = "compressed"
# Maximum number of bytes of the cloud-init.log file that can be dumped to KVP
# at once. This number is based on the analysis done on a large sample of
# cloud-init.log files where the P95 of the file sizes was 537KB and the time
# consumed to dump 500KB file was (P95:76, P99:233, P99.9:1170) in ms
MAX_LOG_TO_KVP_LENGTH = 512000
# File to store the last byte of cloud-init.log that was pushed to KVP. This
# file will be deleted with every VM reboot.
LOG_PUSHED_TO_KVP_INDEX_FILE = "/run/cloud-init/log_pushed_to_kvp_index"
azure_ds_reporter = events.ReportEventStack(
name="azure-ds",
description="initialize reporter for azure ds",
Expand Down Expand Up @@ -212,35 +202,8 @@ def report_compressed_event(event_name, event_content):


@azure_ds_telemetry_reporter
def push_log_to_kvp(file_name=CFG_BUILTIN["def_log_file"]):
"""Push a portion of cloud-init.log file or the whole file to KVP
based on the file size.
The first time this function is called after VM boot, It will push the last
n bytes of the log file such that n < MAX_LOG_TO_KVP_LENGTH
If called again on the same boot, it continues from where it left off.
In addition to cloud-init.log, dmesg log will also be collected."""

start_index = get_last_log_byte_pushed_to_kvp_index()

LOG.debug("Dumping cloud-init.log file to KVP")
try:
with open(file_name, "rb") as f:
f.seek(0, os.SEEK_END)
seek_index = max(f.tell() - MAX_LOG_TO_KVP_LENGTH, start_index)
report_diagnostic_event(
"Dumping last {0} bytes of cloud-init.log file to KVP starting"
" from index: {1}".format(f.tell() - seek_index, seek_index),
logger_func=LOG.debug,
)
f.seek(seek_index, os.SEEK_SET)
report_compressed_event("cloud-init.log", f.read())
util.write_file(LOG_PUSHED_TO_KVP_INDEX_FILE, str(f.tell()))
except Exception as ex:
report_diagnostic_event(
"Exception when dumping log file: %s" % repr(ex),
logger_func=LOG.warning,
)

def report_dmesg_to_kvp():
"""Report dmesg to KVP."""
LOG.debug("Dumping dmesg log to KVP")
try:
out, _ = subp.subp(["dmesg"], decode=False, capture=True)
Expand All @@ -252,30 +215,6 @@ def push_log_to_kvp(file_name=CFG_BUILTIN["def_log_file"]):
)


@azure_ds_telemetry_reporter
def get_last_log_byte_pushed_to_kvp_index():
try:
with open(LOG_PUSHED_TO_KVP_INDEX_FILE, "r") as f:
return int(f.read())
except IOError as e:
if e.errno != ENOENT:
report_diagnostic_event(
"Reading LOG_PUSHED_TO_KVP_INDEX_FILE failed: %s." % repr(e),
logger_func=LOG.warning,
)
except ValueError as e:
report_diagnostic_event(
"Invalid value in LOG_PUSHED_TO_KVP_INDEX_FILE: %s." % repr(e),
logger_func=LOG.warning,
)
except Exception as e:
report_diagnostic_event(
"Failed to get the last log byte pushed to KVP: %s." % repr(e),
logger_func=LOG.warning,
)
return 0


@contextmanager
def cd(newdir):
prevdir = os.getcwd()
Expand Down Expand Up @@ -390,7 +329,6 @@ def build_minimal_ovf(


class AzureEndpointHttpClient:

headers = {
"x-ms-agent-name": "WALinuxAgent",
"x-ms-version": "2012-11-30",
Expand Down Expand Up @@ -487,7 +425,6 @@ def _text_from_xpath(self, xpath):


class OpenSSLManager:

certificate_names = {
"private_key": "TransportPrivate.pem",
"certificate": "TransportCert.pem",
Expand Down Expand Up @@ -616,7 +553,6 @@ def parse_certificates(self, certificates_xml):


class GoalStateHealthReporter:

HEALTH_REPORT_XML_TEMPLATE = textwrap.dedent(
"""\
<?xml version="1.0" encoding="utf-8"?>
Expand Down Expand Up @@ -744,8 +680,6 @@ def build_report(

@azure_ds_telemetry_reporter
def _post_health_report(self, document: bytes) -> None:
push_log_to_kvp()

# Whenever report_diagnostic_event(diagnostic_msg) is invoked in code,
# the diagnostic messages are written to special files
# (/var/opt/hyperv/.kvp_pool_*) as Hyper-V KVP messages.
Expand Down
70 changes: 1 addition & 69 deletions tests/unittests/reporting/test_reporting_hyperv.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from cloudinit import util
from cloudinit.reporting import events, instantiated_handler_registry
from cloudinit.reporting.handlers import HyperVKvpReportingHandler, LogHandler
from cloudinit.reporting.handlers import HyperVKvpReportingHandler
from cloudinit.sources.helpers import azure
from tests.unittests.helpers import CiTestCase

Expand Down Expand Up @@ -234,74 +234,6 @@ def test_report_compressed_event(self):
"telemetry", force=False
)

@mock.patch("cloudinit.sources.helpers.azure.report_compressed_event")
@mock.patch("cloudinit.sources.helpers.azure.report_diagnostic_event")
@mock.patch("cloudinit.subp.subp")
def test_push_log_to_kvp_exception_handling(self, m_subp, m_diag, m_com):
reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path)
try:
instantiated_handler_registry.register_item("telemetry", reporter)
log_file = self.tmp_path("cloud-init.log")
azure.MAX_LOG_TO_KVP_LENGTH = 100
azure.LOG_PUSHED_TO_KVP_INDEX_FILE = self.tmp_path(
"log_pushed_to_kvp"
)
with open(log_file, "w") as f:
log_content = "A" * 50 + "B" * 100
f.write(log_content)

m_com.side_effect = Exception("Mock Exception")
azure.push_log_to_kvp(log_file)

# exceptions will trigger diagnostic reporting calls
self.assertEqual(m_diag.call_count, 3)
finally:
instantiated_handler_registry.unregister_item(
"telemetry", force=False
)

@mock.patch("cloudinit.subp.subp")
@mock.patch.object(LogHandler, "publish_event")
def test_push_log_to_kvp(self, publish_event, m_subp):
reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path)
try:
instantiated_handler_registry.register_item("telemetry", reporter)
log_file = self.tmp_path("cloud-init.log")
azure.MAX_LOG_TO_KVP_LENGTH = 100
azure.LOG_PUSHED_TO_KVP_INDEX_FILE = self.tmp_path(
"log_pushed_to_kvp"
)
with open(log_file, "w") as f:
log_content = "A" * 50 + "B" * 100
f.write(log_content)
azure.push_log_to_kvp(log_file)

with open(log_file, "a") as f:
extra_content = "C" * 10
f.write(extra_content)
azure.push_log_to_kvp(log_file)

# make sure dmesg is called every time
m_subp.assert_called_with(["dmesg"], capture=True, decode=False)

for call_arg in publish_event.call_args_list:
event = call_arg[0][0]
self.assertNotEqual(
event.event_type, azure.COMPRESSED_EVENT_TYPE
)
self.validate_compressed_kvps(
reporter,
2,
[
log_content[-azure.MAX_LOG_TO_KVP_LENGTH :].encode(),
extra_content.encode(),
],
)
finally:
instantiated_handler_registry.unregister_item(
"telemetry", force=False
)
Comment on lines -285 to -303
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be worth keeping unittest coverage of the dmesg functionality that is still retained.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, done! Moved the call to report_dmesg_to_kvp() up to the Azure data source to decouple it from the walinuxagent shim we'll eventually get rid of. report_compressed_event() is tested in the hyperv reporting handler. I didn't want to do it now, but in a future PR will consolidate these kvp bits into the kvp module.


def validate_compressed_kvps(self, reporter, count, values):
reporter.q.join()
kvps = list(reporter._iterate_kvps(0))
Expand Down
37 changes: 24 additions & 13 deletions tests/unittests/sources/test_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from cloudinit import distros, dmi, helpers, subp, url_helper
from cloudinit.atomic_helper import b64e, json_dumps
from cloudinit.net import dhcp, ephemeral
from cloudinit.reporting.handlers import HyperVKvpReportingHandler
from cloudinit.sources import UNSET
from cloudinit.sources import DataSourceAzure as dsaz
from cloudinit.sources import InvalidMetaDataException
Expand Down Expand Up @@ -169,6 +168,16 @@ def mock_imds_fetch_metadata_with_api_fallback():
yield m


@pytest.fixture(autouse=True)
def mock_report_dmesg_to_kvp():
with mock.patch(
MOCKPATH + "report_dmesg_to_kvp",
return_value=True,
autospec=True,
) as m:
yield m


@pytest.fixture
def mock_kvp_report_failure_to_host():
with mock.patch(
Expand Down Expand Up @@ -355,17 +364,6 @@ def patched_reported_ready_marker_path(azure_ds, patched_markers_dir_path):
yield reported_ready_marker


@pytest.fixture
def telemetry_reporter(tmp_path):
kvp_file_path = tmp_path / "kvp_pool_file"
kvp_file_path.write_bytes(b"")
reporter = HyperVKvpReportingHandler(kvp_file_path=str(kvp_file_path))

dsaz.kvp.instantiated_handler_registry.register_item("telemetry", reporter)
yield reporter
dsaz.kvp.instantiated_handler_registry.unregister_item("telemetry")


def fake_http_error_for_code(status_code: int):
response_failure = requests.Response()
response_failure.status_code = status_code
Expand Down Expand Up @@ -3637,6 +3635,7 @@ def provisioning_setup(
mock_kvp_report_success_to_host,
mock_netlink,
mock_readurl,
mock_report_dmesg_to_kvp,
mock_subp_subp,
mock_timestamp,
mock_util_ensure_dir,
Expand Down Expand Up @@ -3666,6 +3665,7 @@ def provisioning_setup(
self.mock_kvp_report_success_to_host = mock_kvp_report_success_to_host
self.mock_netlink = mock_netlink
self.mock_readurl = mock_readurl
self.mock_report_dmesg_to_kvp = mock_report_dmesg_to_kvp
self.mock_subp_subp = mock_subp_subp
self.mock_timestmp = mock_timestamp
self.mock_util_ensure_dir = mock_util_ensure_dir
Expand Down Expand Up @@ -3771,6 +3771,9 @@ def test_no_pps(self):
assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0
assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1

# Verify dmesg reported via KVP.
assert len(self.mock_report_dmesg_to_kvp.mock_calls) == 1

@pytest.mark.parametrize("pps_type", ["Savable", "Running"])
def test_stale_pps(self, pps_type):
imds_md_source = copy.deepcopy(self.imds_md)
Expand Down Expand Up @@ -3944,6 +3947,9 @@ def test_running_pps(self):
assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0
assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2

# Verify dmesg reported via KVP.
assert len(self.mock_report_dmesg_to_kvp.mock_calls) == 2

def test_savable_pps(self):
imds_md_source = copy.deepcopy(self.imds_md)
imds_md_source["extended"]["compute"]["ppsType"] = "Savable"
Expand Down Expand Up @@ -4062,6 +4068,9 @@ def test_savable_pps(self):
assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0
assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2

# Verify dmesg reported via KVP.
assert len(self.mock_report_dmesg_to_kvp.mock_calls) == 2

@pytest.mark.parametrize(
"fabric_side_effect",
[
Expand Down Expand Up @@ -4522,13 +4531,14 @@ def test_errors(

class TestReportFailure:
@pytest.mark.parametrize("kvp_enabled", [False, True])
def report_host_only_kvp_enabled(
def test_report_host_only_kvp_enabled(
self,
azure_ds,
kvp_enabled,
mock_azure_report_failure_to_fabric,
mock_kvp_report_failure_to_host,
mock_kvp_report_success_to_host,
mock_report_dmesg_to_kvp,
):
mock_kvp_report_failure_to_host.return_value = kvp_enabled
error = errors.ReportableError(reason="foo")
Expand All @@ -4538,6 +4548,7 @@ def report_host_only_kvp_enabled(
assert mock_kvp_report_failure_to_host.mock_calls == [mock.call(error)]
assert mock_kvp_report_success_to_host.mock_calls == []
assert mock_azure_report_failure_to_fabric.mock_calls == []
assert mock_report_dmesg_to_kvp.mock_calls == [mock.call()]


class TestValidateIMDSMetadata:
Expand Down
Loading
Loading