Skip to content

Commit

Permalink
bug(schema): write network-config if instance dir present (#4635)
Browse files Browse the repository at this point in the history
Only write /var/lib/cloud/instance/network-config.json once
datasource is detected. The /var/lib/clound/instance symlink is
created by Init.instancify after datasource detection is complete.

Move creation of /var/lib/cloud/instance/network-config.json into
it's own method _write_network_config_json. It will be called by
any call to apply_network_config.

apply_network_config is called in both Local and Network stages.

In Local stage, apply_network_config is used to either:
 - render the final network config of datasource detected in Local
 - in absence of Local datasource, render basic fallback DHCP on
   primary NIC to allow network to come up before detecting a
   Network datasource

For Network datasources, they will not have been discovered or
instancify'd in Local boot stage, so apply_network_config cannot
yet persist network-config.json.

Defer creation of network-config.json for Network datasources
until until the link /var/lib/cloud/instance exists and
apply_network_config is called in Network stage to render final
network config.

Fixes GH-4630
  • Loading branch information
blackboxsw committed Nov 30, 2023
1 parent cd35cad commit ac72b28
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 33 deletions.
41 changes: 25 additions & 16 deletions cloudinit/stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,30 @@ def _get_ipath(self, subname=None):
)
return instance_dir

def _write_network_config_json(self, netcfg: dict):
"""Create /var/lib/cloud/instance/network-config.json
Only attempt once /var/lib/cloud/instance exists which is created
by Init.instancify once a datasource is detected.
"""

if not os.path.islink(self.paths.instance_link):
# Datasource hasn't been detected yet, so we may not
# have visibility to datasource applicable network-config
return
ncfg_instance_path = self.paths.get_ipath_cur("network_config")
network_link = self.paths.get_runpath("network_config")
if os.path.exists(ncfg_instance_path):
# Compare write on delta of current network-config
if netcfg != util.load_json(util.load_file(ncfg_instance_path)):
atomic_helper.write_json(
ncfg_instance_path, netcfg, mode=0o600
)
else:
atomic_helper.write_json(ncfg_instance_path, netcfg, mode=0o600)
if not os.path.islink(network_link):
util.sym_link(ncfg_instance_path, network_link)

def _reflect_cur_instance(self):
# Remove the old symlink and attach a new one so
# that further reads/writes connect into the right location
Expand Down Expand Up @@ -934,22 +958,6 @@ def _find_networking_config(self):
)

def _apply_netcfg_names(self, netcfg):
ncfg_instance_path = self.paths.get_ipath_cur("network_config")
network_link = self.paths.get_runpath("network_config")
if not self._network_already_configured():
if os.path.exists(ncfg_instance_path):
if netcfg != util.load_json(
util.load_file(ncfg_instance_path)
):
atomic_helper.write_json(
ncfg_instance_path, netcfg, mode=0o600
)
else:
atomic_helper.write_json(
ncfg_instance_path, netcfg, mode=0o600
)
if not os.path.islink(network_link):
util.sym_link(ncfg_instance_path, network_link)
try:
LOG.debug("applying net config names for %s", netcfg)
self.distro.networking.apply_network_config_names(netcfg)
Expand Down Expand Up @@ -1009,6 +1017,7 @@ def should_run_on_boot_event():

# refresh netcfg after update
netcfg, src = self._find_networking_config()
self._write_network_config_json(netcfg)

if netcfg and netcfg.get("version") == 1:
validate_cloudconfig_schema(
Expand Down
85 changes: 85 additions & 0 deletions tests/integration_tests/datasources/test_none.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""DataSourceNone integration tests on LXD."""
import json

from pycloudlib.lxd.instance import LXDInstance

from tests.integration_tests.decorators import retry
from tests.integration_tests.instances import IntegrationInstance
from tests.integration_tests.util import verify_clean_log

DS_NONE_BASE_CFG = """\
datasource_list: [None]
datasource:
None:
metadata:
instance-id: my-iid-uuid
userdata_raw: |
#cloud-config
runcmd:
- touch /var/tmp/success-with-datasource-none
"""


@retry(tries=30, delay=1)
def wait_for_cloud_init_status_file(instance: LXDInstance):
"""Wait for a non-empty status.json indicating cloud-init has started.
We don't wait for cloud-init to complete in this scenario as the failure
path on some images leaves us with cloud-init status blocking
indefinitely in the "running" state.
"""
status_file = instance.read_from_file("/run/cloud-init/status.json")
assert len(status_file)


def test_datasource_none_discovery(client: IntegrationInstance):
"""Integration test for #4635.
Test that DataSourceNone detection (used by live installers) doesn't
generate errors or warnings.
"""
log = client.read_from_file("/var/log/cloud-init.log")
verify_clean_log(log)
# Limit datasource detection to DataSourceNone.
client.write_to_file(
"/etc/cloud/cloud.cfg.d/99-force-dsnone.cfg", DS_NONE_BASE_CFG
)
if client.settings.PLATFORM in ["lxd_container"]:
# DataSourceNone provides no network_config.
# To avoid changing network config from platform desired net cfg
# to fallback config, copy out the rendered network config
# to /etc/cloud/cloud.cfg.d/99-orig-net.cfg so it is
# setup by the DataSourceNone case as well.
# Otherwise (LXD specifically) we'll have network torn down due
# to virtual NICs present which results in not network being
# brought up when we emit fallback config which attempts to
# match on PermanentMACAddress. LP:#2022947
client.execute(
"cp /etc/netplan/50-cloud-init.yaml"
" /etc/cloud/cloud.cfg.d/99-orig-net.cfg"
)
client.execute("cloud-init clean --logs --reboot")
wait_for_cloud_init_status_file(client)
status = json.loads(client.execute("cloud-init status --format=json"))
assert [] == status["errors"]
expected_warnings = set(
[
"Used fallback datasource",
"Running ['netplan', 'apply'] resulted in stderr output: Cannot"
" call Open vSwitch: ovsdb-server.service is not running.\nFailed"
" to connect system bus: No such file or directory\nFalling back"
" to a hard restart of systemd-networkd.service\n",
]
)
unexpected_warnings = []
unexpected_warnings = set(
status["recoverable_errors"].get("WARNING", [])
).difference(expected_warnings)
if unexpected_warnings:
raise AssertionError(
f"Unexpected recoverable errors: {list(unexpected_warnings)}"
)
client.execute("cloud-init status --wait")
log = client.read_from_file("/var/log/cloud-init.log")
verify_clean_log(log)
assert client.execute("test -f /var/tmp/success-with-datasource-none").ok
8 changes: 8 additions & 0 deletions tests/integration_tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ def verify_clean_log(log: str, ignore_deprecations: bool = True):
warning_texts.append(
"canonical-livepatch returned error when checking status"
)
if "found network data from DataSourceNone" in log:
warning_texts.append("Used fallback datasource")
warning_texts.append(
"Running ['netplan', 'apply'] resulted in stderr output: Cannot"
" call Open vSwitch: ovsdb-server.service is not running.\nFailed"
" to connect system bus: No such file or directory\nFalling back"
" to a hard restart of systemd-networkd.service\n"
)
if "oracle" in log:
# LP: #1842752
lease_exists_text = "Stderr: RTNETLINK answers: File exists"
Expand Down
5 changes: 4 additions & 1 deletion tests/unittests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import responses

import cloudinit
from cloudinit import cloud, distros
from cloudinit import atomic_helper, cloud, distros
from cloudinit import helpers as ch
from cloudinit import subp, util
from cloudinit.config.schema import (
Expand Down Expand Up @@ -293,6 +293,9 @@ def patchUtils(self, new_root):
("sym_link", -1),
("copy", -1),
],
atomic_helper: [
("write_json", 1),
],
}
for (mod, funcs) in patch_funcs.items():
for (f, am) in funcs:
Expand Down
22 changes: 21 additions & 1 deletion tests/unittests/runs/test_simple_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import copy
import os

from cloudinit import safeyaml, stages, util
from cloudinit import atomic_helper, safeyaml, stages, util
from cloudinit.config.modules import Modules
from cloudinit.settings import PER_INSTANCE
from cloudinit.sources import NetworkConfigSource
from tests.unittests import helpers


Expand Down Expand Up @@ -47,6 +48,15 @@ def setUp(self):
def test_none_ds_populates_var_lib_cloud(self):
"""Init and run_section default behavior creates appropriate dirs."""
# Now start verifying whats created
netcfg = {
"version": 1,
"config": [{"type": "physical", "name": "eth9"}],
}

def fake_network_config():
return netcfg, NetworkConfigSource.FALLBACK

self.assertFalse(os.path.exists("/var/lib/cloud"))
initer = stages.Init()
initer.read_cfg()
initer.initialize()
Expand All @@ -55,10 +65,20 @@ def test_none_ds_populates_var_lib_cloud(self):
self.assertTrue(os.path.isdir(os.path.join("/var/lib/cloud", d)))

initer.fetch()
self.assertFalse(os.path.islink("var/lib/cloud/instance"))
iid = initer.instancify()
self.assertEqual(iid, "iid-datasource-none")
initer.update()
self.assertTrue(os.path.islink("var/lib/cloud/instance"))
initer._find_networking_config = fake_network_config
self.assertFalse(
os.path.exists("/var/lib/cloud/instance/network-config.json")
)
initer.apply_network_config(False)
self.assertEqual(
f"{atomic_helper.json_dumps(netcfg)}\n",
util.load_file("/var/lib/cloud/instance/network-config.json"),
)

def test_none_ds_runs_modules_which_do_not_define_distros(self):
"""Any modules which do not define a distros attribute are run."""
Expand Down
37 changes: 22 additions & 15 deletions tests/unittests/test_stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from cloudinit import sources, stages
from cloudinit.event import EventScope, EventType
from cloudinit.sources import NetworkConfigSource
from cloudinit.util import write_file
from cloudinit.util import sym_link, write_file
from tests.unittests.helpers import mock
from tests.unittests.util import TEST_INSTANCE_ID, FakeDataSource

Expand All @@ -28,7 +28,8 @@ def setup(self, tmpdir):
"paths": {"cloud_dir": self.tmpdir, "run_dir": self.tmpdir},
}
}
tmpdir.mkdir("instance")
tmpdir.mkdir("instance-uuid")
sym_link(tmpdir.join("instance-uuid"), tmpdir.join("instance"))
self.init.datasource = FakeDataSource(paths=self.init.paths)
self._real_is_new_instance = self.init.is_new_instance
self.init.is_new_instance = mock.Mock(return_value=True)
Expand Down Expand Up @@ -393,9 +394,12 @@ def fake_network_config():
assert caplog.records[0].levelname == "INFO"
assert f"network config is disabled by {disable_file}" in caplog.text

@pytest.mark.parametrize("instance_dir_present", (True, False))
@mock.patch("cloudinit.net.get_interfaces_by_mac")
@mock.patch("cloudinit.distros.ubuntu.Distro")
def test_apply_network_on_new_instance(self, m_ubuntu, m_macs):
def test_apply_network_on_new_instance(
self, m_ubuntu, m_macs, instance_dir_present
):
"""Call distro apply_network_config methods on is_new_instance."""
net_cfg = {
"version": 1,
Expand All @@ -415,20 +419,26 @@ def fake_network_config():
m_macs.return_value = {"42:42:42:42:42:42": "eth9"}

self.init._find_networking_config = fake_network_config

if not instance_dir_present:
self.tmpdir.join("instance").remove()
self.tmpdir.join("instance-uuid").remove()
self.init.apply_network_config(True)
networking = self.init.distro.networking
networking.apply_network_config_names.assert_called_with(net_cfg)
self.init.distro.apply_network_config.assert_called_with(
net_cfg, bring_up=True
)
assert net_cfg == json.loads(
self.tmpdir.join("instance/network-config.json").read()
)
assert net_cfg == json.loads(
self.tmpdir.join("network-config.json").read()
)
assert os.path.islink(self.tmpdir.join("network-config.json"))
if instance_dir_present:
assert net_cfg == json.loads(
self.tmpdir.join("network-config.json").read()
)
assert os.path.islink(self.tmpdir.join("network-config.json"))
else:
for path in (
"instance/network-config.json",
"network-config.json",
):
assert not self.tmpdir.join(path).exists()

@mock.patch("cloudinit.distros.ubuntu.Distro")
def test_apply_network_on_same_instance_id(self, m_ubuntu, caplog):
Expand Down Expand Up @@ -526,12 +536,9 @@ def test_apply_network_disabled_when_no_default_boot(
self, m_ubuntu, m_macs, caplog
):
"""Don't apply network if datasource has no BOOT event."""
net_cfg = self._apply_network_setup(m_macs)
self._apply_network_setup(m_macs)
self.init.apply_network_config(True)
self.init.distro.apply_network_config.assert_not_called()
assert net_cfg == json.loads(
self.tmpdir.join("network-config.json").read()
)
assert (
"No network config applied. Neither a new instance nor datasource "
"network update allowed" in caplog.text
Expand Down

0 comments on commit ac72b28

Please sign in to comment.