From 67c90342b59a9edc7d367b341720fd50a3f146d5 Mon Sep 17 00:00:00 2001 From: PengpengSun <40026211+PengpengSun@users.noreply.github.com> Date: Fri, 29 Mar 2024 22:39:13 +0800 Subject: [PATCH] fix: Fall back to cached local ds if no valid ds found (#4997) Rebooting an instance which has finished VMware guest customization with DataSourceVMware will load DataSourceNone due to metadata is NOT available. This is mostly a re-post of PR#229, few differences are: 1. Let ds decide if fallback is allowed, not always fall back to previous cached LOCAL ds. 2. No comparing instance-id of cached ds with previous instance-id due to I think they are always identical. Fixes GH-3402 --- cloudinit/sources/DataSourceVMware.py | 14 +++++++++- cloudinit/sources/__init__.py | 13 +++++++++ cloudinit/stages.py | 40 +++++++++++++++++---------- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/cloudinit/sources/DataSourceVMware.py b/cloudinit/sources/DataSourceVMware.py index 2f8322c4dbb..888060c9bd9 100644 --- a/cloudinit/sources/DataSourceVMware.py +++ b/cloudinit/sources/DataSourceVMware.py @@ -176,7 +176,7 @@ def _get_data(self): break if not self.data_access_method: - LOG.error("failed to find a valid data access method") + LOG.debug("failed to find a valid data access method") return False LOG.info("using data access method %s", self._get_subplatform()) @@ -270,6 +270,18 @@ def get_instance_id(self): self.metadata["instance-id"] = str(id_file.read()).rstrip().lower() return self.metadata["instance-id"] + def check_if_fallback_is_allowed(self): + if ( + self.data_access_method + and self.data_access_method == DATA_ACCESS_METHOD_IMC + and is_vmware_platform() + ): + LOG.debug( + "Cache fallback is allowed for : %s", self._get_subplatform() + ) + return True + return False + def get_public_ssh_keys(self): for key_name in ( "public-keys-data", diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 65222b29b37..839c4542c26 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -347,6 +347,9 @@ def _unpickle(self, ci_pkl_version: int) -> None: if not hasattr(self, key): setattr(self, key, value) + if not hasattr(self, "check_if_fallback_is_allowed"): + setattr(self, "check_if_fallback_is_allowed", lambda: False) + if hasattr(self, "userdata") and self.userdata is not None: # If userdata stores MIME data, on < python3.6 it will be # missing the 'policy' attribute that exists on >=python3.6. @@ -926,6 +929,16 @@ def check_instance_id(self, sys_cfg): # quickly (local check only) if self.instance_id is still return False + def check_if_fallback_is_allowed(self): + """check_if_fallback_is_allowed() + Checks if a cached ds is allowed to be restored when no valid ds is + found in local mode by checking instance-id and searching valid data + through ds list. + + @return True if a ds allows fallback, False otherwise. + """ + return False + @staticmethod def _determine_dsmode(candidates, default=None, valid=None): # return the first candidate that is non None, warn if not valid diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 94caa9c4def..c228805d11e 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -359,20 +359,32 @@ def _get_data_source(self, existing) -> sources.DataSource: LOG.debug(myrep.description) if not ds: - util.del_file(self.paths.instance_link) - (cfg_list, pkg_list) = self._get_datasources() - # Deep copy so that user-data handlers can not modify - # (which will affect user-data handlers down the line...) - (ds, dsname) = sources.find_source( - self.cfg, - self.distro, - self.paths, - copy.deepcopy(self.ds_deps), - cfg_list, - pkg_list, - self.reporter, - ) - LOG.info("Loaded datasource %s - %s", dsname, ds) + try: + cfg_list, pkg_list = self._get_datasources() + # Deep copy so that user-data handlers can not modify + # (which will affect user-data handlers down the line...) + ds, dsname = sources.find_source( + self.cfg, + self.distro, + self.paths, + copy.deepcopy(self.ds_deps), + cfg_list, + pkg_list, + self.reporter, + ) + util.del_file(self.paths.instance_link) + LOG.info("Loaded datasource %s - %s", dsname, ds) + except sources.DataSourceNotFoundException as e: + if existing != "check": + raise e + ds = self._restore_from_cache() + if ds and ds.check_if_fallback_is_allowed(): + LOG.info( + "Restored fallback datasource from checked cache: %s", + ds, + ) + else: + raise e self.datasource = ds # Ensure we adjust our path members datasource # now that we have one (thus allowing ipath to be used)