From c34f236b4675ca2a5136c8522417b4368f30b322 Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Thu, 10 Feb 2022 11:57:57 -0500 Subject: [PATCH] sources/azure: validate IMDS network configuration metadata Due to race conditions and caching, IMDS may return stale or incomplete metadata. Add some validation to detect these scenarios and report appropriate telemetry. Signed-off-by: Chris Patterson --- cloudinit/sources/DataSourceAzure.py | 74 ++++++++++++++++++++++++++- cloudinit/sources/helpers/azure.py | 2 + tests/unittests/sources/test_azure.py | 21 ++++++-- 3 files changed, 93 insertions(+), 4 deletions(-) diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 44efd3586665..6a2d67a533f1 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -15,7 +15,7 @@ from enum import Enum from functools import partial from time import sleep, time -from typing import Optional +from typing import List, Optional from xml.dom import minidom import requests @@ -178,6 +178,26 @@ def find_dev_from_busdev(camcontrol_out, busdev): return None +def normalize_mac_address(mac: str): + """Normalize mac address with colons and lower-case.""" + if len(mac) == 12: + mac = ":".join( + [mac[0:2], mac[2:4], mac[4:6], mac[6:8], mac[8:10], mac[10:12]] + ) + + return mac.lower() + + +@azure_ds_telemetry_reporter +def get_hv_netvsc_macs_normalized() -> List[str]: + """Get Hyper-V NICs as normalized MAC addresses.""" + return [ + normalize_mac_address(n[1]) + for n in net.get_interfaces() + if n[2] == "hv_netvsc" + ] + + def execute_or_debug(cmd, fail_ret=None): try: return subp.subp(cmd)[0] @@ -517,6 +537,9 @@ def crawl_metadata(self): # fetch metadata again as it has changed after reprovisioning imds_md = self.get_imds_data_with_api_fallback(retries=10) + # Report errors if IMDS network configuration is missing data. + self.validate_imds_network_metadata(imds_md=imds_md) + self.seed = metadata_source crawled_data.update( { @@ -1534,6 +1557,46 @@ def network_config(self): def region(self): return self.metadata.get("imds", {}).get("compute", {}).get("location") + @azure_ds_telemetry_reporter + def validate_imds_network_metadata(self, imds_md: dict) -> bool: + """Validate IMDS network config and report telemetry for errors.""" + local_macs = get_hv_netvsc_macs_normalized() + + try: + network_config = imds_md["network"] + imds_macs = [ + normalize_mac_address(i["macAddress"]) + for i in network_config["interface"] + ] + except KeyError: + report_diagnostic_event( + "IMDS network metadata has incomplete configuration: %r" + % imds_md.get("network"), + logger_func=LOG.warning, + ) + return False + + missing_macs = [m for m in local_macs if m not in imds_macs] + if not missing_macs: + return True + + report_diagnostic_event( + "IMDS network metadata is missing configuration for NICs %r: %r" + % (missing_macs, network_config), + logger_func=LOG.warning, + ) + + if self._ephemeral_dhcp_ctx and self._ephemeral_dhcp_ctx.iface: + primary_mac = net.get_interface_mac(self._ephemeral_dhcp_ctx.iface) + if primary_mac in missing_macs: + report_diagnostic_event( + "IMDS network metadata is missing primary NIC %r: %r" + % (primary_mac, network_config), + logger_func=LOG.warning, + ) + + return False + def _username_from_imds(imds_data): try: @@ -2183,6 +2246,7 @@ def _generate_network_config_from_imds_metadata(imds_metadata) -> dict: # If there are no available IP addresses, then we don't # want to add this interface to the generated config. if not addresses: + LOG.debug("No %s addresses found for: %r", addr_type, intf) continue has_ip_address = True if addr_type == "ipv4": @@ -2218,6 +2282,14 @@ def _generate_network_config_from_imds_metadata(imds_metadata) -> dict: if driver and driver == "hv_netvsc": dev_config["match"]["driver"] = driver netconfig["ethernets"][nicname] = dev_config + continue + + LOG.debug( + "No configuration for: %s (dev_config=%r) (has_ip_address=%r)", + nicname, + dev_config, + has_ip_address, + ) return netconfig diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 8e8f5ce5c7c8..166708f47775 100755 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -12,11 +12,13 @@ from contextlib import contextmanager from datetime import datetime from errno import ENOENT +from typing import List from xml.etree import ElementTree from xml.sax.saxutils import escape from cloudinit import ( distros, + net, stages, subp, temp_utils, diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index a6c43ea7c781..33285c3cb21b 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -801,6 +801,22 @@ def _dmi_mocks(key): ), (dsaz, "get_boot_telemetry", mock.MagicMock()), (dsaz, "get_system_info", mock.MagicMock()), + ( + dsaz.net, + "get_interface_mac", + mock.MagicMock(return_value="00:15:5d:69:63:ba"), + ), + ( + dsaz.net, + "get_interfaces", + mock.MagicMock( + return_value=[ + ("dummy0", "9e:65:d6:19:19:01", None, None), + ("eth0", "00:15:5d:69:63:ba", "hv_netvsc", "0x3"), + ("lo", "00:00:00:00:00:00", None, None), + ] + ), + ), (dsaz.subp, "which", lambda x: True), ( dsaz.dmi, @@ -1923,8 +1939,7 @@ def test_fallback_network_config( blacklist_drivers=["mlx4_core", "mlx5_core"], config_driver=True ) - @mock.patch(MOCKPATH + "net.get_interfaces", autospec=True) - def test_blacklist_through_distro(self, m_net_get_interfaces): + def test_blacklist_through_distro(self): """Verify Azure DS updates blacklist drivers in the distro's networking object.""" odata = {"HostName": "myhost", "UserName": "myuser"} @@ -1942,7 +1957,7 @@ def test_blacklist_through_distro(self, m_net_get_interfaces): ) distro.networking.get_interfaces_by_mac() - m_net_get_interfaces.assert_called_with( + dsaz.net.get_interfaces.assert_called_with( blacklist_drivers=dsaz.BLACKLIST_DRIVERS )