From d3bd2ba6bdf2e8e3c6192f20dde86533cc667a09 Mon Sep 17 00:00:00 2001 From: Paula Gombar Date: Thu, 22 Oct 2020 16:42:11 -0700 Subject: [PATCH 1/8] fix timestamp for goal state archive --- azurelinuxagent/common/protocol/wire.py | 16 ++++++--- azurelinuxagent/common/utils/archive.py | 47 ++++++++++++------------- tests/protocol/test_wire.py | 38 +++++++++++++++++++- 3 files changed, 72 insertions(+), 29 deletions(-) diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 005d95daf6..dbb801f428 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -16,14 +16,13 @@ # # Requires Python 2.6+ and Openssl 1.0+ -import datetime import json import os import random import time import traceback import xml.sax.saxutils as saxutils -from datetime import datetime # pylint: disable=ungrouped-imports +from datetime import datetime # pylint: disable=ungrouped-imports import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger @@ -542,6 +541,8 @@ def __init__(self, endpoint): self._endpoint = endpoint self._goal_state = None self._last_try_update_goal_state_failed = False + self._previous_goal_state_timestamp = None + self._current_goal_state_timestamp = None self._host_plugin = None self.status_blob = StatusBlob(self) self.goal_state_flusher = StateFlusher(conf.get_lib_dir()) @@ -758,11 +759,18 @@ def _update_host_plugin(self, container_id, role_config_name): self._host_plugin.update_role_config_name(role_config_name) def _save_goal_state(self): - try: - self.goal_state_flusher.flush(datetime.utcnow()) + self._current_goal_state_timestamp = datetime.utcnow() + + # If this is the first goal state, the timestamp is current + if not self._previous_goal_state_timestamp: + self._previous_goal_state_timestamp = self._current_goal_state_timestamp + try: + self.goal_state_flusher.flush(self._previous_goal_state_timestamp) except Exception as e: # pylint: disable=C0103 logger.warn("Failed to save the previous goal state to the history folder: {0}", ustr(e)) + finally: + self._previous_goal_state_timestamp = self._current_goal_state_timestamp try: local_file = os.path.join(conf.get_lib_dir(), INCARNATION_FILE_NAME) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 2fa3f12408..96ecad4c1b 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -10,7 +10,6 @@ import azurelinuxagent.common.logger as logger - # pylint: disable=W0105 """ archive.py @@ -41,32 +40,32 @@ MAX_ARCHIVED_STATES = 50 CACHE_PATTERNS = [ - re.compile("^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), # pylint: disable=W1401 - re.compile("^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), # pylint: disable=W1401 - re.compile("^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) # pylint: disable=W1401 + re.compile("^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), # pylint: disable=W1401 + re.compile("^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), # pylint: disable=W1401 + re.compile("^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) # pylint: disable=W1401 ] # 2018-04-06T08:21:37.142697 # 2018-04-06T08:21:37.142697.zip -ARCHIVE_PATTERNS_DIRECTORY = re.compile('^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$') # pylint: disable=W1401 -ARCHIVE_PATTERNS_ZIP = re.compile('^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\.zip$') # pylint: disable=W1401 +ARCHIVE_PATTERNS_DIRECTORY = re.compile('^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$') # pylint: disable=W1401 +ARCHIVE_PATTERNS_ZIP = re.compile('^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\.zip$') # pylint: disable=W1401 class StateFlusher(object): def __init__(self, lib_dir): self._source = lib_dir - d = os.path.join(self._source, ARCHIVE_DIRECTORY_NAME) # pylint: disable=C0103 + d = os.path.join(self._source, ARCHIVE_DIRECTORY_NAME) # pylint: disable=C0103 if not os.path.exists(d): try: fileutil.mkdir(d) - except OSError as e: # pylint: disable=C0103 + except OSError as e: # pylint: disable=C0103 if e.errno != errno.EEXIST: logger.error("{0} : {1}", self._source, e.strerror) def flush(self, timestamp): files = self._get_files_to_archive() - if len(files) == 0: # pylint: disable=len-as-condition + if len(files) == 0: # pylint: disable=len-as-condition return if self._mkdir(timestamp): @@ -79,10 +78,10 @@ def history_dir(self, timestamp): def _get_files_to_archive(self): files = [] - for f in os.listdir(self._source): # pylint: disable=C0103 + for f in os.listdir(self._source): # pylint: disable=C0103 full_path = os.path.join(self._source, f) for pattern in CACHE_PATTERNS: - m = pattern.match(f) # pylint: disable=C0103 + m = pattern.match(f) # pylint: disable=C0103 if m is not None: files.append(full_path) break @@ -90,21 +89,21 @@ def _get_files_to_archive(self): return files def _archive(self, files, timestamp): - for f in files: # pylint: disable=C0103 + for f in files: # pylint: disable=C0103 dst = os.path.join(self.history_dir(timestamp), os.path.basename(f)) shutil.move(f, dst) def _purge(self, files): - for f in files: # pylint: disable=C0103 + for f in files: # pylint: disable=C0103 os.remove(f) def _mkdir(self, timestamp): - d = self.history_dir(timestamp) # pylint: disable=C0103 + d = self.history_dir(timestamp) # pylint: disable=C0103 try: fileutil.mkdir(d, mode=0o700) return True - except IOError as e: # pylint: disable=C0103 + except IOError as e: # pylint: disable=C0103 logger.error("{0} : {1}".format(d, e.strerror)) return False @@ -148,15 +147,15 @@ def __ge__(self, other): class StateZip(State): - def __init__(self, path, timestamp): # pylint: disable=W0235 - super(StateZip,self).__init__(path, timestamp) + def __init__(self, path, timestamp): # pylint: disable=W0235 + super(StateZip, self).__init__(path, timestamp) def delete(self): os.remove(self._path) class StateDirectory(State): - def __init__(self, path, timestamp): # pylint: disable=W0235 + def __init__(self, path, timestamp): # pylint: disable=W0235 super(StateDirectory, self).__init__(path, timestamp) def delete(self): @@ -164,10 +163,10 @@ def delete(self): def archive(self): fn_tmp = "{0}.zip.tmp".format(self._path) - fn = "{0}.zip".format(self._path) # pylint: disable=C0103 + fn = "{0}.zip".format(self._path) # pylint: disable=C0103 ziph = zipfile.ZipFile(fn_tmp, 'w') - for f in os.listdir(self._path): # pylint: disable=C0103 + for f in os.listdir(self._path): # pylint: disable=C0103 full_path = os.path.join(self._path, f) ziph.write(full_path, f, zipfile.ZIP_DEFLATED) @@ -184,7 +183,7 @@ def __init__(self, lib_dir): if not os.path.isdir(self._source): try: fileutil.mkdir(self._source, mode=0o700) - except IOError as e: # pylint: disable=C0103 + except IOError as e: # pylint: disable=C0103 if e.errno != errno.EEXIST: logger.error("{0} : {1}", self._source, e.strerror) @@ -207,13 +206,13 @@ def archive(self): def _get_archive_states(self): states = [] - for f in os.listdir(self._source): # pylint: disable=C0103 + for f in os.listdir(self._source): # pylint: disable=C0103 full_path = os.path.join(self._source, f) - m = ARCHIVE_PATTERNS_DIRECTORY.match(f) # pylint: disable=C0103 + m = ARCHIVE_PATTERNS_DIRECTORY.match(f) # pylint: disable=C0103 if m is not None: states.append(StateDirectory(full_path, m.group(0))) - m = ARCHIVE_PATTERNS_ZIP.match(f) # pylint: disable=C0103 + m = ARCHIVE_PATTERNS_ZIP.match(f) # pylint: disable=C0103 if m is not None: states.append(StateZip(full_path, m.group(0))) diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index bd80a7411e..b8ecae9746 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -24,7 +24,10 @@ import time import unittest import uuid +from datetime import datetime, timedelta +import azurelinuxagent.common.conf as conf +from azurelinuxagent.common.exception import IncompleteGoalStateError from azurelinuxagent.common.exception import InvalidContainerError, ResourceGoneError, ProtocolError, \ ExtensionDownloadError, HttpError from azurelinuxagent.common.protocol.goal_state import ExtensionsConfig, GoalState @@ -35,7 +38,6 @@ from azurelinuxagent.common.telemetryevent import TelemetryEventList, GuestAgentExtensionEventsSchema, \ TelemetryEventParam, TelemetryEvent from azurelinuxagent.common.utils import restutil -from azurelinuxagent.common.exception import IncompleteGoalStateError from azurelinuxagent.common.version import CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION from tests.ga.test_monitor import random_generator from tests.protocol import mockwiredata @@ -1054,8 +1056,42 @@ def test_forced_update_should_update_the_goal_state_and_the_host_plugin_when_the self.assertEqual(protocol.client.get_host_plugin().container_id, new_container_id) self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) + + def test_update_goal_state_should_archive_last_goal_state(self): + with patch("azurelinuxagent.common.protocol.wire.datetime") as patch_datetime: + first_gs_timestamp = datetime.utcnow() + timedelta(minutes=5) + second_gs_timestamp = datetime.utcnow() + timedelta(minutes=10) + third_gs_timestamp = datetime.utcnow() + timedelta(minutes=15) + patch_datetime.utcnow.side_effect = [first_gs_timestamp, second_gs_timestamp, third_gs_timestamp] + + # The first goal state is created when we instantiate the protocol + with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: + history_dir = os.path.join(conf.get_lib_dir(), "history") + archives = os.listdir(history_dir) + self.assertEqual(len(archives), 0, "The goal state archive should have been empty since this is the first goal state") + + # Create the second new goal state, so the initial one should be archived + protocol.mock_wire_data.set_incarnation("2") + protocol.client.update_goal_state() + + # The initial goal state should be in the archive + archives = os.listdir(history_dir) + self.assertEqual(len(archives), 1, "Only one goal state should have been archived") + self.assertEqual(archives[0], first_gs_timestamp.isoformat(), "The name of goal state archive should match the first goal state timestamp") + + # Create the third goal state, so the second one should be archived too + protocol.mock_wire_data.set_incarnation("3") + protocol.client.update_goal_state() + + # The second goal state should be in the archive + archives = os.listdir(history_dir) + archives.sort() + self.assertEqual(len(archives), 2, "Two goal states should have been archived") + self.assertEqual(archives[1], second_gs_timestamp.isoformat(), "The name of goal state archive should match the second goal state timestamp") + # pylint: enable=too-many-public-methods + class TryUpdateGoalStateTestCase(HttpRequestPredicates, AgentTestCase): """ Tests for WireClient.try_update_goal_state() From 0cb2a12e847d90d92b5962a919d221dbf0e43a96 Mon Sep 17 00:00:00 2001 From: Paula Gombar Date: Fri, 23 Oct 2020 14:52:43 -0700 Subject: [PATCH 2/8] move timestamp to goal state instead of protocol --- azurelinuxagent/common/protocol/goal_state.py | 2 ++ azurelinuxagent/common/protocol/wire.py | 17 ++++------------- tests/protocol/test_wire.py | 2 +- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 242ee929cd..12caeda589 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -20,6 +20,7 @@ import os import re import time +from datetime import datetime import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger @@ -141,6 +142,7 @@ def __init__(self, wire_client, full_goal_state=False, base_incarnation=None): raise ProtocolError(msg="Error fetching goal state", inner=exception) finally: logger.info('Fetch goal state completed') + self.fetch_timestamp = datetime.utcnow() @staticmethod def fetch_goal_state(wire_client): diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index dbb801f428..2e3e730126 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -541,8 +541,6 @@ def __init__(self, endpoint): self._endpoint = endpoint self._goal_state = None self._last_try_update_goal_state_failed = False - self._previous_goal_state_timestamp = None - self._current_goal_state_timestamp = None self._host_plugin = None self.status_blob = StatusBlob(self) self.goal_state_flusher = StateFlusher(conf.get_lib_dir()) @@ -722,8 +720,9 @@ def update_goal_state(self, forced=False): new_goal_state = GoalState.fetch_full_goal_state_if_incarnation_different_than(self, self._goal_state.incarnation) if new_goal_state is not None: + previous_goal_state_fetch_timestamp = self._goal_state.fetch_timestamp if self._goal_state else datetime.utcnow() self._goal_state = new_goal_state - self._save_goal_state() + self._save_goal_state(previous_goal_state_fetch_timestamp) self._update_host_plugin(new_goal_state.container_id, new_goal_state.role_config_name) except Exception as exception: @@ -758,19 +757,11 @@ def _update_host_plugin(self, container_id, role_config_name): self._host_plugin.update_container_id(container_id) self._host_plugin.update_role_config_name(role_config_name) - def _save_goal_state(self): - self._current_goal_state_timestamp = datetime.utcnow() - - # If this is the first goal state, the timestamp is current - if not self._previous_goal_state_timestamp: - self._previous_goal_state_timestamp = self._current_goal_state_timestamp - + def _save_goal_state(self, previous_goal_state_fetch_timestamp): try: - self.goal_state_flusher.flush(self._previous_goal_state_timestamp) + self.goal_state_flusher.flush(previous_goal_state_fetch_timestamp) except Exception as e: # pylint: disable=C0103 logger.warn("Failed to save the previous goal state to the history folder: {0}", ustr(e)) - finally: - self._previous_goal_state_timestamp = self._current_goal_state_timestamp try: local_file = os.path.join(conf.get_lib_dir(), INCARNATION_FILE_NAME) diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index b8ecae9746..2dfb4d2ab4 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -1058,7 +1058,7 @@ def test_forced_update_should_update_the_goal_state_and_the_host_plugin_when_the self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) def test_update_goal_state_should_archive_last_goal_state(self): - with patch("azurelinuxagent.common.protocol.wire.datetime") as patch_datetime: + with patch("azurelinuxagent.common.protocol.goal_state.datetime") as patch_datetime: first_gs_timestamp = datetime.utcnow() + timedelta(minutes=5) second_gs_timestamp = datetime.utcnow() + timedelta(minutes=10) third_gs_timestamp = datetime.utcnow() + timedelta(minutes=15) From 6b9cac30ca8c46b051afe2403981ac6181497268 Mon Sep 17 00:00:00 2001 From: Paula Gombar Date: Tue, 27 Oct 2020 18:44:20 -0700 Subject: [PATCH 3/8] use GoalState file timestamp instead of in-memory property --- azurelinuxagent/common/protocol/goal_state.py | 4 +- azurelinuxagent/common/protocol/wire.py | 8 +- azurelinuxagent/common/utils/archive.py | 97 +++++++++++-------- tests/protocol/test_wire.py | 19 ++-- tests/utils/test_archive.py | 58 +---------- 5 files changed, 78 insertions(+), 108 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 12caeda589..ca91411ecb 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -20,19 +20,18 @@ import os import re import time -from datetime import datetime import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger from azurelinuxagent.common.AgentGlobals import AgentGlobals from azurelinuxagent.common.datacontract import set_properties from azurelinuxagent.common.event import add_event, WALAEventOperation +from azurelinuxagent.common.exception import IncompleteGoalStateError from azurelinuxagent.common.exception import ProtocolError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.restapi import Cert, CertList, Extension, ExtHandler, ExtHandlerList, \ ExtHandlerVersionUri, RemoteAccessUser, RemoteAccessUsersList, \ VMAgentManifest, VMAgentManifestList, VMAgentManifestUri -from azurelinuxagent.common.exception import IncompleteGoalStateError from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.cryptutil import CryptUtil from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, findtext, getattrib, gettext @@ -142,7 +141,6 @@ def __init__(self, wire_client, full_goal_state=False, base_incarnation=None): raise ProtocolError(msg="Error fetching goal state", inner=exception) finally: logger.info('Fetch goal state completed') - self.fetch_timestamp = datetime.utcnow() @staticmethod def fetch_goal_state(wire_client): diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 2e3e730126..b788feb9b4 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -22,7 +22,6 @@ import time import traceback import xml.sax.saxutils as saxutils -from datetime import datetime # pylint: disable=ungrouped-imports import azurelinuxagent.common.conf as conf import azurelinuxagent.common.logger as logger @@ -720,9 +719,8 @@ def update_goal_state(self, forced=False): new_goal_state = GoalState.fetch_full_goal_state_if_incarnation_different_than(self, self._goal_state.incarnation) if new_goal_state is not None: - previous_goal_state_fetch_timestamp = self._goal_state.fetch_timestamp if self._goal_state else datetime.utcnow() self._goal_state = new_goal_state - self._save_goal_state(previous_goal_state_fetch_timestamp) + self._save_goal_state() self._update_host_plugin(new_goal_state.container_id, new_goal_state.role_config_name) except Exception as exception: @@ -757,9 +755,9 @@ def _update_host_plugin(self, container_id, role_config_name): self._host_plugin.update_container_id(container_id) self._host_plugin.update_role_config_name(role_config_name) - def _save_goal_state(self, previous_goal_state_fetch_timestamp): + def _save_goal_state(self): try: - self.goal_state_flusher.flush(previous_goal_state_fetch_timestamp) + self.goal_state_flusher.flush() except Exception as e: # pylint: disable=C0103 logger.warn("Failed to save the previous goal state to the history folder: {0}", ustr(e)) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 96ecad4c1b..5507d23119 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -5,12 +5,13 @@ import re import shutil import zipfile - -from azurelinuxagent.common.utils import fileutil +from datetime import datetime import azurelinuxagent.common.logger as logger +from azurelinuxagent.common.utils import fileutil # pylint: disable=W0105 + """ archive.py @@ -40,71 +41,89 @@ MAX_ARCHIVED_STATES = 50 CACHE_PATTERNS = [ - re.compile("^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), # pylint: disable=W1401 - re.compile("^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), # pylint: disable=W1401 - re.compile("^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) # pylint: disable=W1401 + re.compile(r"^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), + re.compile(r"^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), + re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) ] +GOAL_STATE_PATTERN = re.compile(r"^(.*)GoalState\.(\d+)\.xml$", re.IGNORECASE) + # 2018-04-06T08:21:37.142697 # 2018-04-06T08:21:37.142697.zip -ARCHIVE_PATTERNS_DIRECTORY = re.compile('^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$') # pylint: disable=W1401 -ARCHIVE_PATTERNS_ZIP = re.compile('^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\.zip$') # pylint: disable=W1401 +ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$") +ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\.zip$") class StateFlusher(object): def __init__(self, lib_dir): self._source = lib_dir - d = os.path.join(self._source, ARCHIVE_DIRECTORY_NAME) # pylint: disable=C0103 - if not os.path.exists(d): + directory = os.path.join(self._source, ARCHIVE_DIRECTORY_NAME) + if not os.path.exists(directory): try: - fileutil.mkdir(d) + fileutil.mkdir(directory) except OSError as e: # pylint: disable=C0103 if e.errno != errno.EEXIST: logger.error("{0} : {1}", self._source, e.strerror) - def flush(self, timestamp): + def flush(self): files = self._get_files_to_archive() if len(files) == 0: # pylint: disable=len-as-condition return - if self._mkdir(timestamp): - self._archive(files, timestamp) + goal_state_timestamp = self._get_latest_timestamp(files) + if goal_state_timestamp and self._mkdir(goal_state_timestamp): + self._archive(files, goal_state_timestamp) else: self._purge(files) def history_dir(self, timestamp): return os.path.join(self._source, ARCHIVE_DIRECTORY_NAME, timestamp.isoformat()) + @staticmethod + def _get_latest_timestamp(files): + # Get the most recently modified GoalState.*.xml (if there are more than one) and use that timestamp for the archive name. + latest_timestamp_ms = None + for current_file in files: + match = GOAL_STATE_PATTERN.match(current_file) + if not match: + continue + + creation_time_ms = os.path.getmtime(current_file) + if not latest_timestamp_ms or latest_timestamp_ms < creation_time_ms: + latest_timestamp_ms = creation_time_ms + + return datetime.utcfromtimestamp(latest_timestamp_ms) + def _get_files_to_archive(self): files = [] - for f in os.listdir(self._source): # pylint: disable=C0103 - full_path = os.path.join(self._source, f) + for current_file in os.listdir(self._source): + full_path = os.path.join(self._source, current_file) for pattern in CACHE_PATTERNS: - m = pattern.match(f) # pylint: disable=C0103 - if m is not None: + match = pattern.match(current_file) + if match is not None: files.append(full_path) break return files def _archive(self, files, timestamp): - for f in files: # pylint: disable=C0103 - dst = os.path.join(self.history_dir(timestamp), os.path.basename(f)) - shutil.move(f, dst) + for current_file in files: + dst = os.path.join(self.history_dir(timestamp), os.path.basename(current_file)) + shutil.move(current_file, dst) def _purge(self, files): - for f in files: # pylint: disable=C0103 - os.remove(f) + for current_file in files: + os.remove(current_file) def _mkdir(self, timestamp): - d = self.history_dir(timestamp) # pylint: disable=C0103 + directory = self.history_dir(timestamp) try: - fileutil.mkdir(d, mode=0o700) + fileutil.mkdir(directory, mode=0o700) return True except IOError as e: # pylint: disable=C0103 - logger.error("{0} : {1}".format(d, e.strerror)) + logger.error("{0} : {1}".format(directory, e.strerror)) return False @@ -163,16 +182,16 @@ def delete(self): def archive(self): fn_tmp = "{0}.zip.tmp".format(self._path) - fn = "{0}.zip".format(self._path) # pylint: disable=C0103 + filename = "{0}.zip".format(self._path) ziph = zipfile.ZipFile(fn_tmp, 'w') - for f in os.listdir(self._path): # pylint: disable=C0103 - full_path = os.path.join(self._path, f) - ziph.write(full_path, f, zipfile.ZIP_DEFLATED) + for current_file in os.listdir(self._path): + full_path = os.path.join(self._path, current_file) + ziph.write(full_path, current_file, zipfile.ZIP_DEFLATED) ziph.close() - os.rename(fn_tmp, fn) + os.rename(fn_tmp, filename) shutil.rmtree(self._path) @@ -206,14 +225,14 @@ def archive(self): def _get_archive_states(self): states = [] - for f in os.listdir(self._source): # pylint: disable=C0103 - full_path = os.path.join(self._source, f) - m = ARCHIVE_PATTERNS_DIRECTORY.match(f) # pylint: disable=C0103 - if m is not None: - states.append(StateDirectory(full_path, m.group(0))) - - m = ARCHIVE_PATTERNS_ZIP.match(f) # pylint: disable=C0103 - if m is not None: - states.append(StateZip(full_path, m.group(0))) + for current_file in os.listdir(self._source): + full_path = os.path.join(self._source, current_file) + match = ARCHIVE_PATTERNS_DIRECTORY.match(current_file) + if match is not None: + states.append(StateDirectory(full_path, match.group(0))) + + match = ARCHIVE_PATTERNS_ZIP.match(current_file) + if match is not None: + states.append(StateZip(full_path, match.group(0))) return states diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 09a27d0b53..7a9589395b 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -1057,11 +1057,14 @@ def test_forced_update_should_update_the_goal_state_and_the_host_plugin_when_the self.assertEqual(protocol.client.get_host_plugin().role_config_name, new_role_config_name) def test_update_goal_state_should_archive_last_goal_state(self): - with patch("azurelinuxagent.common.protocol.goal_state.datetime") as patch_datetime: - first_gs_timestamp = datetime.utcnow() + timedelta(minutes=5) - second_gs_timestamp = datetime.utcnow() + timedelta(minutes=10) - third_gs_timestamp = datetime.utcnow() + timedelta(minutes=15) - patch_datetime.utcnow.side_effect = [first_gs_timestamp, second_gs_timestamp, third_gs_timestamp] + # We use the last modified timestamp of the goal state to be archived to determine the archive's name. + mock_mtime = os.path.getmtime(self.tmp_dir) + with patch("azurelinuxagent.common.utils.archive.os.path.getmtime") as patch_mtime: + first_gs_ms = mock_mtime + timedelta(minutes=5).seconds + second_gs_ms = mock_mtime + timedelta(minutes=10).seconds + third_gs_ms = mock_mtime + timedelta(minutes=15).seconds + + patch_mtime.side_effect = [first_gs_ms, second_gs_ms, third_gs_ms] # The first goal state is created when we instantiate the protocol with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol: @@ -1074,19 +1077,21 @@ def test_update_goal_state_should_archive_last_goal_state(self): protocol.client.update_goal_state() # The initial goal state should be in the archive + first_archive_name = datetime.utcfromtimestamp(first_gs_ms).isoformat() archives = os.listdir(history_dir) self.assertEqual(len(archives), 1, "Only one goal state should have been archived") - self.assertEqual(archives[0], first_gs_timestamp.isoformat(), "The name of goal state archive should match the first goal state timestamp") + self.assertEqual(archives[0], first_archive_name, "The name of goal state archive should match the first goal state timestamp") # Create the third goal state, so the second one should be archived too protocol.mock_wire_data.set_incarnation("3") protocol.client.update_goal_state() # The second goal state should be in the archive + second_archive_name = datetime.utcfromtimestamp(second_gs_ms).isoformat() archives = os.listdir(history_dir) archives.sort() self.assertEqual(len(archives), 2, "Two goal states should have been archived") - self.assertEqual(archives[1], second_gs_timestamp.isoformat(), "The name of goal state archive should match the second goal state timestamp") + self.assertEqual(archives[1], second_archive_name, "The name of goal state archive should match the second goal state timestamp") # pylint: enable=too-many-public-methods diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 40e9f103b9..ffe5c3d389 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -51,6 +51,7 @@ def test_archive00(self): under the history folder that is timestamped. """ temp_files = [ + 'GoalState.0.xml', 'Prod.0.manifest.xml', 'Prod.0.agentsManifest', 'Microsoft.Azure.Extensions.CustomScript.0.xml' @@ -60,7 +61,7 @@ def test_archive00(self): self._write_file(f) test_subject = StateFlusher(self.tmp_dir) - test_subject.flush(datetime.utcnow()) + test_subject.flush() self.assertTrue(os.path.exists(self.history_dir)) self.assertTrue(os.path.isdir(self.history_dir)) @@ -86,6 +87,7 @@ def test_archive01(self): 2. Deleting the timestamped directory """ temp_files = [ + 'GoalState.0.xml', 'Prod.0.manifest.xml', 'Prod.0.agentsManifest', 'Microsoft.Azure.Extensions.CustomScript.0.xml' @@ -95,7 +97,7 @@ def test_archive01(self): self._write_file(f) flusher = StateFlusher(self.tmp_dir) - flusher.flush(datetime.utcnow()) + flusher.flush() test_subject = StateArchiver(self.tmp_dir) test_subject.archive() @@ -158,58 +160,6 @@ def test_archive02(self): fn = "{0}.zip".format(ts) # pylint: disable=invalid-name self.assertTrue(fn in archived_entries, "'{0}' is not in the list of unpurged entires".format(fn)) - def test_archive03(self): - """ - If the StateFlusher has to flush the same file, it should - overwrite the existing one. - """ - temp_files = [ - 'Prod.0.manifest.xml', - 'Prod.0.agentsManifest', - 'Microsoft.Azure.Extensions.CustomScript.0.xml' - ] - - def _write_goal_state_files(temp_files, content=None): - for f in temp_files: # pylint: disable=invalid-name - self._write_file(f, content) - - def _check_history_files(timestamp_dir, files, content=None): - for f in files: # pylint: disable=invalid-name - history_path = os.path.join(self.history_dir, timestamp_dir, f) - msg = "expected the temp file {0} to exist".format(history_path) - self.assertTrue(os.path.exists(history_path), msg) - expected_content = f if content is None else content - actual_content = fileutil.read_file(history_path) - self.assertEqual(expected_content, actual_content) - - timestamp = datetime.utcnow() - - _write_goal_state_files(temp_files) - test_subject = StateFlusher(self.tmp_dir) - test_subject.flush(timestamp) - - # Ensure history directory exists, has proper timestamped-based name, - self.assertTrue(os.path.exists(self.history_dir)) - self.assertTrue(os.path.isdir(self.history_dir)) - - timestamp_dirs = os.listdir(self.history_dir) - self.assertEqual(1, len(timestamp_dirs)) - - self.assertIsIso8601(timestamp_dirs[0]) - ts = self.parse_isoformat(timestamp_dirs[0]) # pylint: disable=invalid-name - self.assertDateTimeCloseTo(ts, datetime.utcnow(), timedelta(seconds=30)) - - # Ensure saved files contain the right content - _check_history_files(timestamp_dirs[0], temp_files) - - # re-write all of the same files with different content, and flush again. - # .flush() should overwrite the existing ones - _write_goal_state_files(temp_files, "--this-has-been-changed--") - test_subject.flush(timestamp) - - # The contents of the saved files were overwritten as a result of the flush. - _check_history_files(timestamp_dirs[0], temp_files, "--this-has-been-changed--") - def test_archive04(self): """ The archive directory is created if it does not exist. From 919d2487fde390c1c2f4221166f4e9f119fe8acc Mon Sep 17 00:00:00 2001 From: Paula Gombar Date: Wed, 28 Oct 2020 15:35:53 -0700 Subject: [PATCH 4/8] fix pylint warnings --- azurelinuxagent/common/utils/archive.py | 58 +++++++++++-------------- tests/utils/test_archive.py | 8 ++-- 2 files changed, 30 insertions(+), 36 deletions(-) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 5507d23119..8f33122a9d 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -36,39 +36,39 @@ """ # pylint: enable=W0105 -ARCHIVE_DIRECTORY_NAME = 'history' +_ARCHIVE_DIRECTORY_NAME = 'history' -MAX_ARCHIVED_STATES = 50 +_MAX_ARCHIVED_STATES = 50 -CACHE_PATTERNS = [ +_CACHE_PATTERNS = [ re.compile(r"^(.*)\.(\d+)\.(agentsManifest)$", re.IGNORECASE), re.compile(r"^(.*)\.(\d+)\.(manifest\.xml)$", re.IGNORECASE), re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) ] -GOAL_STATE_PATTERN = re.compile(r"^(.*)GoalState\.(\d+)\.xml$", re.IGNORECASE) +_GOAL_STATE_PATTERN = re.compile(r"^(.*)GoalState\.(\d+)\.xml$", re.IGNORECASE) # 2018-04-06T08:21:37.142697 # 2018-04-06T08:21:37.142697.zip -ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$") -ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\.zip$") +_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$") +_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\.zip$") class StateFlusher(object): def __init__(self, lib_dir): self._source = lib_dir - directory = os.path.join(self._source, ARCHIVE_DIRECTORY_NAME) + directory = os.path.join(self._source, _ARCHIVE_DIRECTORY_NAME) if not os.path.exists(directory): try: fileutil.mkdir(directory) - except OSError as e: # pylint: disable=C0103 - if e.errno != errno.EEXIST: - logger.error("{0} : {1}", self._source, e.strerror) + except OSError as exception: + if exception.errno != errno.EEXIST: + logger.error("{0} : {1}", self._source, exception.strerror) def flush(self): files = self._get_files_to_archive() - if len(files) == 0: # pylint: disable=len-as-condition + if not files: return goal_state_timestamp = self._get_latest_timestamp(files) @@ -78,20 +78,20 @@ def flush(self): self._purge(files) def history_dir(self, timestamp): - return os.path.join(self._source, ARCHIVE_DIRECTORY_NAME, timestamp.isoformat()) + return os.path.join(self._source, _ARCHIVE_DIRECTORY_NAME, timestamp.isoformat()) @staticmethod def _get_latest_timestamp(files): # Get the most recently modified GoalState.*.xml (if there are more than one) and use that timestamp for the archive name. latest_timestamp_ms = None for current_file in files: - match = GOAL_STATE_PATTERN.match(current_file) + match = _GOAL_STATE_PATTERN.match(current_file) if not match: continue - creation_time_ms = os.path.getmtime(current_file) - if not latest_timestamp_ms or latest_timestamp_ms < creation_time_ms: - latest_timestamp_ms = creation_time_ms + modification_time_ms = os.path.getmtime(current_file) + if not latest_timestamp_ms or latest_timestamp_ms < modification_time_ms: + latest_timestamp_ms = modification_time_ms return datetime.utcfromtimestamp(latest_timestamp_ms) @@ -99,7 +99,7 @@ def _get_files_to_archive(self): files = [] for current_file in os.listdir(self._source): full_path = os.path.join(self._source, current_file) - for pattern in CACHE_PATTERNS: + for pattern in _CACHE_PATTERNS: match = pattern.match(current_file) if match is not None: files.append(full_path) @@ -122,8 +122,8 @@ def _mkdir(self, timestamp): try: fileutil.mkdir(directory, mode=0o700) return True - except IOError as e: # pylint: disable=C0103 - logger.error("{0} : {1}".format(directory, e.strerror)) + except IOError as exception: + logger.error("{0} : {1}".format(directory, exception.strerror)) return False @@ -166,17 +166,11 @@ def __ge__(self, other): class StateZip(State): - def __init__(self, path, timestamp): # pylint: disable=W0235 - super(StateZip, self).__init__(path, timestamp) - def delete(self): os.remove(self._path) class StateDirectory(State): - def __init__(self, path, timestamp): # pylint: disable=W0235 - super(StateDirectory, self).__init__(path, timestamp) - def delete(self): shutil.rmtree(self._path) @@ -197,14 +191,14 @@ def archive(self): class StateArchiver(object): def __init__(self, lib_dir): - self._source = os.path.join(lib_dir, ARCHIVE_DIRECTORY_NAME) + self._source = os.path.join(lib_dir, _ARCHIVE_DIRECTORY_NAME) if not os.path.isdir(self._source): try: fileutil.mkdir(self._source, mode=0o700) - except IOError as e: # pylint: disable=C0103 - if e.errno != errno.EEXIST: - logger.error("{0} : {1}", self._source, e.strerror) + except IOError as exception: + if exception.errno != errno.EEXIST: + logger.error("{0} : {1}", self._source, exception.strerror) def purge(self): """ @@ -215,7 +209,7 @@ def purge(self): states = self._get_archive_states() states.sort(reverse=True) - for state in states[MAX_ARCHIVED_STATES:]: + for state in states[_MAX_ARCHIVED_STATES:]: state.delete() def archive(self): @@ -227,11 +221,11 @@ def _get_archive_states(self): states = [] for current_file in os.listdir(self._source): full_path = os.path.join(self._source, current_file) - match = ARCHIVE_PATTERNS_DIRECTORY.match(current_file) + match = _ARCHIVE_PATTERNS_DIRECTORY.match(current_file) if match is not None: states.append(StateDirectory(full_path, match.group(0))) - match = ARCHIVE_PATTERNS_ZIP.match(current_file) + match = _ARCHIVE_PATTERNS_ZIP.match(current_file) if match is not None: states.append(StateZip(full_path, match.group(0))) diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index ffe5c3d389..1f7f83ea35 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -9,7 +9,7 @@ import azurelinuxagent.common.logger as logger from azurelinuxagent.common.utils import fileutil -from azurelinuxagent.common.utils.archive import StateFlusher, StateArchiver, MAX_ARCHIVED_STATES +from azurelinuxagent.common.utils.archive import StateFlusher, StateArchiver, _MAX_ARCHIVED_STATES from tests.tools import AgentTestCase debug = False # pylint: disable=invalid-name @@ -126,7 +126,7 @@ def test_archive02(self): directories are properly deleted from the disk. """ count = 6 - total = MAX_ARCHIVED_STATES + count + total = _MAX_ARCHIVED_STATES + count start = datetime.now() timestamps = [] @@ -148,11 +148,11 @@ def test_archive02(self): test_subject.purge() archived_entries = os.listdir(self.history_dir) - self.assertEqual(MAX_ARCHIVED_STATES, len(archived_entries)) + self.assertEqual(_MAX_ARCHIVED_STATES, len(archived_entries)) archived_entries.sort() - for i in range(0, MAX_ARCHIVED_STATES): + for i in range(0, _MAX_ARCHIVED_STATES): ts = timestamps[i + count].isoformat() # pylint: disable=invalid-name if i % 2 == 0: fn = ts # pylint: disable=invalid-name From 96d3ba62dc010c0e9d0c52a85b595421af390e02 Mon Sep 17 00:00:00 2001 From: Paula Gombar Date: Thu, 29 Oct 2020 11:27:15 -0700 Subject: [PATCH 5/8] address comments --- azurelinuxagent/common/utils/archive.py | 30 ++++++++++++++++--------- tests/utils/test_archive.py | 2 +- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index 5e3769361b..e59de9909e 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -46,7 +46,7 @@ re.compile(r"^(.*)\.(\d+)\.(xml)$", re.IGNORECASE) ] -_GOAL_STATE_PATTERN = re.compile(r"^(.*)GoalState\.(\d+)\.xml$", re.IGNORECASE) +_GOAL_STATE_PATTERN = re.compile(r"^(.*)/GoalState\.(\d+)\.xml$", re.IGNORECASE) # 2018-04-06T08:21:37.142697 # 2018-04-06T08:21:37.142697.zip @@ -71,18 +71,23 @@ def flush(self): if not files: return - goal_state_timestamp = self._get_latest_timestamp(files) - if goal_state_timestamp and self._mkdir(goal_state_timestamp): - self._archive(files, goal_state_timestamp) + archive_name = self._get_latest_timestamp(files) + if archive_name and self._mkdir(archive_name): + self._archive(files, archive_name) else: self._purge(files) - def history_dir(self, timestamp): - return os.path.join(self._source, _ARCHIVE_DIRECTORY_NAME, timestamp.isoformat()) + def history_dir(self, name): + return os.path.join(self._source, _ARCHIVE_DIRECTORY_NAME, name) @staticmethod def _get_latest_timestamp(files): - # Get the most recently modified GoalState.*.xml (if there are more than one) and use that timestamp for the archive name. + """ + Gets the most recently modified GoalState.*.xml and uses that timestamp for the archive name. + In a normal workflow, we expect there to be only one GoalState.*.xml at a time, but if the previous one + wasn't purged for whatever reason, we take the most recently modified goal state file. + If there are no GoalState.*.xml files, we return None. + """ latest_timestamp_ms = None for current_file in files: match = _GOAL_STATE_PATTERN.match(current_file) @@ -90,10 +95,13 @@ def _get_latest_timestamp(files): continue modification_time_ms = os.path.getmtime(current_file) - if not latest_timestamp_ms or latest_timestamp_ms < modification_time_ms: + if latest_timestamp_ms is None or latest_timestamp_ms < modification_time_ms: latest_timestamp_ms = modification_time_ms - return datetime.utcfromtimestamp(latest_timestamp_ms) + if latest_timestamp_ms is not None: + return datetime.utcfromtimestamp(latest_timestamp_ms).isoformat() + else: + return None def _get_files_to_archive(self): files = [] @@ -116,8 +124,8 @@ def _purge(self, files): for current_file in files: os.remove(current_file) - def _mkdir(self, timestamp): - directory = self.history_dir(timestamp) + def _mkdir(self, name): + directory = self.history_dir(name) try: fileutil.mkdir(directory, mode=0o700) diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 1f7f83ea35..0b38c3eb65 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -160,7 +160,7 @@ def test_archive02(self): fn = "{0}.zip".format(ts) # pylint: disable=invalid-name self.assertTrue(fn in archived_entries, "'{0}' is not in the list of unpurged entires".format(fn)) - def test_archive04(self): + def test_archive03(self): """ The archive directory is created if it does not exist. From 662450357f2c9325d73c4267a7065e6b0fff7cb0 Mon Sep 17 00:00:00 2001 From: Paula Gombar Date: Thu, 29 Oct 2020 16:44:20 -0700 Subject: [PATCH 6/8] adress comments and add incarnation to archive name --- azurelinuxagent/common/utils/archive.py | 24 +++--- tests/protocol/test_wire.py | 8 +- tests/utils/test_archive.py | 103 ++++++++++++++---------- 3 files changed, 76 insertions(+), 59 deletions(-) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index e59de9909e..c4c83e2f9c 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -48,10 +48,10 @@ _GOAL_STATE_PATTERN = re.compile(r"^(.*)/GoalState\.(\d+)\.xml$", re.IGNORECASE) -# 2018-04-06T08:21:37.142697 -# 2018-04-06T08:21:37.142697.zip -_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+$") -_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+\.zip$") +# 2018-04-06T08:21:37.142697_incarnation_N +# 2018-04-06T08:21:37.142697_incarnation_N.zip +_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+_incarnation_(\d+)$") +_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+_incarnation_(\d+)\.zip$") class StateFlusher(object): @@ -71,7 +71,7 @@ def flush(self): if not files: return - archive_name = self._get_latest_timestamp(files) + archive_name = self._get_archive_name(files) if archive_name and self._mkdir(archive_name): self._archive(files, archive_name) else: @@ -81,14 +81,16 @@ def history_dir(self, name): return os.path.join(self._source, _ARCHIVE_DIRECTORY_NAME, name) @staticmethod - def _get_latest_timestamp(files): + def _get_archive_name(files): """ - Gets the most recently modified GoalState.*.xml and uses that timestamp for the archive name. + Gets the most recently modified GoalState.*.xml and uses that timestamp and incarnation for the archive name. In a normal workflow, we expect there to be only one GoalState.*.xml at a time, but if the previous one wasn't purged for whatever reason, we take the most recently modified goal state file. If there are no GoalState.*.xml files, we return None. """ latest_timestamp_ms = None + incarnation = None + for current_file in files: match = _GOAL_STATE_PATTERN.match(current_file) if not match: @@ -97,11 +99,11 @@ def _get_latest_timestamp(files): modification_time_ms = os.path.getmtime(current_file) if latest_timestamp_ms is None or latest_timestamp_ms < modification_time_ms: latest_timestamp_ms = modification_time_ms + incarnation = match.groups()[1] - if latest_timestamp_ms is not None: - return datetime.utcfromtimestamp(latest_timestamp_ms).isoformat() - else: - return None + if latest_timestamp_ms is not None and incarnation is not None: + return datetime.utcfromtimestamp(latest_timestamp_ms).isoformat() + "_incarnation_{0}".format(incarnation) + return None def _get_files_to_archive(self): files = [] diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 9a438669e9..3918c68fd9 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -1077,21 +1077,21 @@ def test_update_goal_state_should_archive_last_goal_state(self): protocol.client.update_goal_state() # The initial goal state should be in the archive - first_archive_name = datetime.utcfromtimestamp(first_gs_ms).isoformat() + first_archive_name = datetime.utcfromtimestamp(first_gs_ms).isoformat() + "_incarnation_1" archives = os.listdir(history_dir) self.assertEqual(len(archives), 1, "Only one goal state should have been archived") - self.assertEqual(archives[0], first_archive_name, "The name of goal state archive should match the first goal state timestamp") + self.assertEqual(archives[0], first_archive_name, "The name of goal state archive should match the first goal state timestamp and incarnation") # Create the third goal state, so the second one should be archived too protocol.mock_wire_data.set_incarnation("3") protocol.client.update_goal_state() # The second goal state should be in the archive - second_archive_name = datetime.utcfromtimestamp(second_gs_ms).isoformat() + second_archive_name = datetime.utcfromtimestamp(second_gs_ms).isoformat() + "_incarnation_2" archives = os.listdir(history_dir) archives.sort() self.assertEqual(len(archives), 2, "Two goal states should have been archived") - self.assertEqual(archives[1], second_archive_name, "The name of goal state archive should match the second goal state timestamp") + self.assertEqual(archives[1], second_archive_name, "The name of goal state archive should match the second goal state timestamp and incarnation") # pylint: enable=too-many-public-methods diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index 0b38c3eb65..f128e7da65 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -32,19 +32,28 @@ def tearDown(self): if not debug and self.tmp_dir is not None: shutil.rmtree(self.tmp_dir) - def _write_file(self, fn, contents=None): # pylint: disable=invalid-name - full_name = os.path.join(self.tmp_dir, fn) + def _write_file(self, filename, contents=None): + full_name = os.path.join(self.tmp_dir, filename) fileutil.mkdir(os.path.dirname(full_name)) - with open(full_name, 'w') as fh: # pylint: disable=invalid-name - data = contents if contents is not None else fn - fh.write(data) + with open(full_name, 'w') as file_handler: + data = contents if contents is not None else filename + file_handler.write(data) return full_name @property def history_dir(self): return os.path.join(self.tmp_dir, 'history') + @staticmethod + def _parse_archive_name(name): + # Name can be a directory or a zip + # '0000-00-00T00:00:00.000000_incarnation_0' + # '0000-00-00T00:00:00.000000_incarnation_0.zip' + timestamp_str, _, incarnation_ext = name.split("_") + incarnation_no_ext = os.path.splitext(incarnation_ext)[0] + return timestamp_str, incarnation_no_ext + def test_archive00(self): """ StateFlusher should move all 'goal state' files to a new directory @@ -57,8 +66,8 @@ def test_archive00(self): 'Microsoft.Azure.Extensions.CustomScript.0.xml' ] - for f in temp_files: # pylint: disable=invalid-name - self._write_file(f) + for temp_file in temp_files: + self._write_file(temp_file) test_subject = StateFlusher(self.tmp_dir) test_subject.flush() @@ -69,12 +78,14 @@ def test_archive00(self): timestamp_dirs = os.listdir(self.history_dir) self.assertEqual(1, len(timestamp_dirs)) - self.assertIsIso8601(timestamp_dirs[0]) - ts = self.parse_isoformat(timestamp_dirs[0]) # pylint: disable=invalid-name - self.assertDateTimeCloseTo(ts, datetime.utcnow(), timedelta(seconds=30)) + timestamp_str, incarnation = self._parse_archive_name(timestamp_dirs[0]) + self.assert_is_iso8601(timestamp_str) + timestamp = self.parse_isoformat(timestamp_str) + self.assert_datetime_close_to(timestamp, datetime.utcnow(), timedelta(seconds=30)) + self.assertEqual("0", incarnation) - for f in temp_files: # pylint: disable=invalid-name - history_path = os.path.join(self.history_dir, timestamp_dirs[0], f) + for temp_file in temp_files: + history_path = os.path.join(self.history_dir, timestamp_dirs[0], temp_file) msg = "expected the temp file {0} to exist".format(history_path) self.assertTrue(os.path.exists(history_path), msg) @@ -93,8 +104,8 @@ def test_archive01(self): 'Microsoft.Azure.Extensions.CustomScript.0.xml' ] - for f in temp_files: # pylint: disable=invalid-name - self._write_file(f) + for current_file in temp_files: + self._write_file(current_file) flusher = StateFlusher(self.tmp_dir) flusher.flush() @@ -105,15 +116,16 @@ def test_archive01(self): timestamp_zips = os.listdir(self.history_dir) self.assertEqual(1, len(timestamp_zips)) - zip_fn = timestamp_zips[0] # 2000-01-01T00:00:00.000000.zip - ts_s = os.path.splitext(zip_fn)[0] # 2000-01-01T00:00:00.000000 + zip_fn = timestamp_zips[0] # 2000-01-01T00:00:00.000000_incarnation_N.zip + timestamp_str, incarnation = self._parse_archive_name(zip_fn) - self.assertIsIso8601(ts_s) - ts = self.parse_isoformat(ts_s) # pylint: disable=invalid-name - self.assertDateTimeCloseTo(ts, datetime.utcnow(), timedelta(seconds=30)) + self.assert_is_iso8601(timestamp_str) + timestamp = self.parse_isoformat(timestamp_str) + self.assert_datetime_close_to(timestamp, datetime.utcnow(), timedelta(seconds=30)) + self.assertEqual("0", incarnation) zip_full = os.path.join(self.history_dir, zip_fn) - self.assertZipContains(zip_full, temp_files) + self.assert_zip_contains(zip_full, temp_files) def test_archive02(self): """ @@ -132,15 +144,15 @@ def test_archive02(self): timestamps = [] for i in range(0, total): - ts = start + timedelta(seconds=i) # pylint: disable=invalid-name - timestamps.append(ts) + timestamp = start + timedelta(seconds=i) + timestamps.append(timestamp) if i % 2 == 0: - fn = os.path.join('history', ts.isoformat(), 'Prod.0.manifest.xml') # pylint: disable=invalid-name + filename = os.path.join('history', "{0}_incarnation_0".format(timestamp.isoformat()), 'Prod.0.manifest.xml') else: - fn = os.path.join('history', "{0}.zip".format(ts.isoformat())) # pylint: disable=invalid-name + filename = os.path.join('history', "{0}_incarnation_0.zip".format(timestamp.isoformat())) - self._write_file(fn) + self._write_file(filename) self.assertEqual(total, len(os.listdir(self.history_dir))) @@ -153,12 +165,12 @@ def test_archive02(self): archived_entries.sort() for i in range(0, _MAX_ARCHIVED_STATES): - ts = timestamps[i + count].isoformat() # pylint: disable=invalid-name + timestamp = timestamps[i + count].isoformat() if i % 2 == 0: - fn = ts # pylint: disable=invalid-name + filename = "{0}_incarnation_0".format(timestamp) else: - fn = "{0}.zip".format(ts) # pylint: disable=invalid-name - self.assertTrue(fn in archived_entries, "'{0}' is not in the list of unpurged entires".format(fn)) + filename = "{0}_incarnation_0.zip".format(timestamp) + self.assertTrue(filename in archived_entries, "'{0}' is not in the list of unpurged entires".format(filename)) def test_archive03(self): """ @@ -169,35 +181,38 @@ def test_archive03(self): test_subject = StateArchiver(os.path.join(self.tmp_dir, 'does-not-exist')) test_subject.purge() - def parse_isoformat(self, s): # pylint: disable=invalid-name - return datetime.strptime(s, '%Y-%m-%dT%H:%M:%S.%f') + @staticmethod + def parse_isoformat(timestamp_str): + return datetime.strptime(timestamp_str, '%Y-%m-%dT%H:%M:%S.%f') - def assertIsIso8601(self, s): # pylint: disable=invalid-name + @staticmethod + def assert_is_iso8601(timestamp_str): try: - self.parse_isoformat(s) + TestArchive.parse_isoformat(timestamp_str) except: - raise AssertionError("the value '{0}' is not an ISO8601 formatted timestamp".format(s)) + raise AssertionError("the value '{0}' is not an ISO8601 formatted timestamp".format(timestamp_str)) - def _total_seconds(self, td): # pylint: disable=invalid-name + @staticmethod + def _total_seconds(delta): """ Compute the total_seconds for a timedelta because 2.6 does not have total_seconds. """ - return (0.0 + td.microseconds + (td.seconds + td.days * 24 * 60 * 60) * 10 ** 6) / 10 ** 6 + return (0.0 + delta.microseconds + (delta.seconds + delta.days * 24 * 60 * 60) * 10 ** 6) / 10 ** 6 - def assertDateTimeCloseTo(self, t1, t2, within): # pylint: disable=invalid-name - if t1 <= t2: - diff = t2 -t1 + def assert_datetime_close_to(self, time1, time2, within): + if time1 <= time2: + diff = time2 - time1 else: - diff = t1 - t2 + diff = time1 - time2 secs = self._total_seconds(within - diff) if secs < 0: self.fail("the timestamps are outside of the tolerance of by {0} seconds".format(secs)) - def assertZipContains(self, zip_fn, files): # pylint: disable=invalid-name - ziph = zipfile.ZipFile(zip_fn, 'r') + def assert_zip_contains(self, zip_filename, files): + ziph = zipfile.ZipFile(zip_filename, 'r') zip_files = [x.filename for x in ziph.filelist] - for f in files: # pylint: disable=invalid-name - self.assertTrue(f in zip_files, "'{0}' was not found in {1}".format(f, zip_fn)) + for current_file in files: + self.assertTrue(current_file in zip_files, "'{0}' was not found in {1}".format(current_file, zip_filename)) ziph.close() From af70b4f41bd16eef1889a10a51c65a11d8a02ac4 Mon Sep 17 00:00:00 2001 From: Paula Gombar Date: Thu, 29 Oct 2020 17:53:57 -0700 Subject: [PATCH 7/8] ensure both old and new naming is supported --- azurelinuxagent/common/utils/archive.py | 5 ++-- tests/utils/test_archive.py | 37 +++++++++++++++++++++---- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index c4c83e2f9c..ecd3069b23 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -48,10 +48,11 @@ _GOAL_STATE_PATTERN = re.compile(r"^(.*)/GoalState\.(\d+)\.xml$", re.IGNORECASE) +# Old names didn't have incarnation, new ones do. Ensure the regex captures both cases. # 2018-04-06T08:21:37.142697_incarnation_N # 2018-04-06T08:21:37.142697_incarnation_N.zip -_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+_incarnation_(\d+)$") -_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+_incarnation_(\d+)\.zip$") +_ARCHIVE_PATTERNS_DIRECTORY = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+(_incarnation_(\d+))?$$") +_ARCHIVE_PATTERNS_ZIP = re.compile(r"^\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+(_incarnation_(\d+))?\.zip$") class StateFlusher(object): diff --git a/tests/utils/test_archive.py b/tests/utils/test_archive.py index f128e7da65..818a4ea4b6 100644 --- a/tests/utils/test_archive.py +++ b/tests/utils/test_archive.py @@ -1,16 +1,15 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the Apache License. -from datetime import datetime, timedelta - -import zipfile import os import shutil import tempfile +import zipfile +from datetime import datetime, timedelta import azurelinuxagent.common.logger as logger from azurelinuxagent.common.utils import fileutil from azurelinuxagent.common.utils.archive import StateFlusher, StateArchiver, _MAX_ARCHIVED_STATES -from tests.tools import AgentTestCase +from tests.tools import AgentTestCase, patch debug = False # pylint: disable=invalid-name if os.environ.get('DEBUG') == '1': @@ -50,7 +49,7 @@ def _parse_archive_name(name): # Name can be a directory or a zip # '0000-00-00T00:00:00.000000_incarnation_0' # '0000-00-00T00:00:00.000000_incarnation_0.zip' - timestamp_str, _, incarnation_ext = name.split("_") + timestamp_str, incarnation_ext = name.split("_incarnation_") incarnation_no_ext = os.path.splitext(incarnation_ext)[0] return timestamp_str, incarnation_no_ext @@ -173,6 +172,34 @@ def test_archive02(self): self.assertTrue(filename in archived_entries, "'{0}' is not in the list of unpurged entires".format(filename)) def test_archive03(self): + """ + All archives should be purged, both with the new naming (with incarnation number) and with the old naming. + """ + start = datetime.now() + timestamp1 = start + timedelta(seconds=5) + timestamp2 = start + timedelta(seconds=10) + + dir_old = timestamp1.isoformat() + dir_new = "{0}_incarnation_1".format(timestamp2.isoformat()) + + archive_old = "{0}.zip".format(timestamp1.isoformat()) + archive_new = "{0}_incarnation_1.zip".format(timestamp2.isoformat()) + + self._write_file(os.path.join("history", dir_old, "Prod.0.manifest.xml")) + self._write_file(os.path.join("history", dir_new, "Prod.1.manifest.xml")) + self._write_file(os.path.join("history", archive_old)) + self._write_file(os.path.join("history", archive_new)) + + self.assertEqual(4, len(os.listdir(self.history_dir)), "Not all entries were archived!") + + test_subject = StateArchiver(self.tmp_dir) + with patch("azurelinuxagent.common.utils.archive._MAX_ARCHIVED_STATES", 0): + test_subject.purge() + + archived_entries = os.listdir(self.history_dir) + self.assertEqual(0, len(archived_entries), "Not all entries were purged!") + + def test_archive04(self): """ The archive directory is created if it does not exist. From 21ebd557cab09af5f9a5a64f46e27cdfc12e050b Mon Sep 17 00:00:00 2001 From: Paula Gombar Date: Tue, 3 Nov 2020 13:52:15 -0800 Subject: [PATCH 8/8] don't purge if no goal state file exists --- azurelinuxagent/common/utils/archive.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/azurelinuxagent/common/utils/archive.py b/azurelinuxagent/common/utils/archive.py index ecd3069b23..b87f337d5d 100644 --- a/azurelinuxagent/common/utils/archive.py +++ b/azurelinuxagent/common/utils/archive.py @@ -73,7 +73,10 @@ def flush(self): return archive_name = self._get_archive_name(files) - if archive_name and self._mkdir(archive_name): + if archive_name is None: + return + + if self._mkdir(archive_name): self._archive(files, archive_name) else: self._purge(files)