diff --git a/azurelinuxagent/ga/update.py b/azurelinuxagent/ga/update.py
index 293dd2313a..83db4be7a1 100644
--- a/azurelinuxagent/ga/update.py
+++ b/azurelinuxagent/ga/update.py
@@ -46,7 +46,7 @@
from azurelinuxagent.common.exception import ResourceGoneError, UpdateError, ExitException, AgentUpgradeExitException
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil, systemd
-from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses
+from azurelinuxagent.common.protocol.restapi import VMAgentUpdateStatus, VMAgentUpdateStatuses, ExtHandlerPackageList
from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol
from azurelinuxagent.common.utils import shellutil
@@ -449,6 +449,15 @@ def _try_update_goal_state(self, protocol):
return False
return True
+ def __goal_state_updated(self, incarnation):
+ """
+ This function returns if the Goal State updated.
+ We currently rely on the incarnation number to determine that; i.e. if it changed from the last processed GS
+ """
+ # TODO: This check should be based on the ExtensionsGoalState.id property
+ # (this property abstracts incarnation/etag logic based on the delivery pipeline of the Goal State)
+ return incarnation != self.last_incarnation
+
def _process_goal_state(self, exthandlers_handler, remote_access_handler):
protocol = exthandlers_handler.protocol
if not self._try_update_goal_state(protocol):
@@ -479,7 +488,7 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler):
incarnation = protocol.get_incarnation()
try:
- if incarnation != self.last_incarnation: # TODO: This check should be based in the etag for the extensions goal state
+ if self.__goal_state_updated(incarnation):
if not self._extensions_summary.converged:
message = "A new goal state was received, but not all the extensions in the previous goal state have completed: {0}".format(self._extensions_summary)
logger.warn(message)
@@ -491,9 +500,9 @@ def _process_goal_state(self, exthandlers_handler, remote_access_handler):
# report status always, even if the goal state did not change
# do it before processing the remote access, since that operation can take a long time
- self._report_status(exthandlers_handler, incarnation_changed=incarnation != self.last_incarnation)
+ self._report_status(exthandlers_handler, incarnation_changed=self.__goal_state_updated(incarnation))
- if incarnation != self.last_incarnation:
+ if self.__goal_state_updated(incarnation):
remote_access_handler.run()
finally:
self.last_incarnation = incarnation
@@ -903,7 +912,9 @@ def _shutdown(self):
def _check_and_download_agent_if_upgrade_available(self, protocol, base_version=CURRENT_VERSION):
"""
- This function periodically (1hr by default) checks if new Agent upgrade is available and downloads it on filesystem if it is.
+ This function downloads the new agent if an update is available.
+ If a requested version is available in goal state, then only that version is downloaded (new-update model)
+ Else, we periodically (1hr by default) checks if new Agent upgrade is available and download it on filesystem if available (old-update model)
rtype: Boolean
return: True if current agent is no longer available or an agent with a higher version number is available
else False
@@ -912,40 +923,103 @@ def _check_and_download_agent_if_upgrade_available(self, protocol, base_version=
if not conf.get_autoupdate_enabled():
return False
- now = time.time()
- if self.last_attempt_time is not None:
- next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency()
- else:
- next_attempt_time = now
- if next_attempt_time > now:
- return False
+ def report_error(msg_, version=CURRENT_VERSION):
+ logger.warn(msg_)
+ add_event(AGENT_NAME, op=WALAEventOperation.Download, version=version, is_success=False, message=msg_)
family = conf.get_autoupdate_gafamily()
- logger.info("Checking for agent updates (family: {0})", family)
-
- self.last_attempt_time = now
-
+ incarnation_changed = False
try:
- manifest_list, etag = protocol.get_vmagent_manifests()
-
+ # Fetch the agent manifests from the latest Goal State
+ manifest_list, incarnation = protocol.get_vmagent_manifests()
+ incarnation_changed = self.__goal_state_updated(incarnation)
manifests = [m for m in manifest_list if m.family == family and len(m.uris) > 0]
if len(manifests) == 0:
- logger.verbose(u"Incarnation {0} has no {1} agent updates",
- etag, family)
+ logger.verbose(
+ u"No manifest links found for agent family: {0} for incarnation: {1}, skipping update check".format(
+ family, incarnation))
return False
+ except Exception as err:
+ # If there's some issues in fetching the agent manifests, report it only on incarnation change
+ if incarnation_changed:
+ msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(err))
+ report_error(msg)
+ return False
- pkg_list = protocol.get_vmagent_pkgs(manifests[0])
+ requested_version = None
+ if conf.get_enable_ga_versioning() and manifests[0].is_requested_version_specified:
+ # If GA versioning is enabled and requested version present in GS, and it's a new GS, follow new logic
+ if incarnation_changed:
+ # With the new model, we will get a new GS when CRP wants us to auto-update using required version.
+ # If there's no new incarnation, don't proceed with anything
+ requested_version = manifests[0].requested_version
+ msg = "Found requested version in manifest: {0} for incarnation: {1}".format(
+ requested_version, incarnation)
+ logger.info(msg)
+ add_event(AGENT_NAME, op=WALAEventOperation.AgentUpgrade, is_success=True, message=msg)
+ else:
+ # If incarnation didn't change, don't process anything.
+ return False
+ else:
+ # If no requested version specified in the Goal State, follow the old auto-update logic
+ # Note: If the first Goal State contains a requested version, this timer won't start (i.e. self.last_attempt_time won't be updated).
+ # If any subsequent goal state does not contain requested version, this timer will start then, and we will
+ # download all versions available in PIR and auto-update to the highest available version on that goal state.
+ now = time.time()
+ if self.last_attempt_time is not None:
+ next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency()
+ else:
+ next_attempt_time = now
+ if next_attempt_time > now:
+ return False
- # Set the agents to those available for download at least as
- # current as the existing agent and remove from disk any agent
- # no longer reported to the VM.
- # Note:
- # The code leaves on disk available, but blacklisted, agents
- # so as to preserve the state. Otherwise, those agents could be
- # again downloaded and inappropriately retried.
+ logger.info("No requested version specified, checking for all versions for agent update (family: {0})",
+ family)
+
+ self.last_attempt_time = now
+
+ try:
+ # If we make it to this point, then either there is a requested version in a new GS (new auto-update model),
+ # or the 1hr time limit has elapsed for us to check the agent manifest for updates (old auto-update model).
+ pkg_list = ExtHandlerPackageList()
+
+ # If the requested version is the current version, don't download anything;
+ # the call to purge() below will delete all other agents from disk
+ # In this case, no need to even fetch the GA family manifest as we don't need to download any agent.
+ if requested_version is not None and requested_version == CURRENT_VERSION:
+ packages_to_download = []
+ logger.info("The requested version is running as the current version: {0}".format(requested_version))
+ else:
+ pkg_list = protocol.get_vmagent_pkgs(manifests[0])
+ packages_to_download = pkg_list.versions
+
+ # Verify the requested version is in GA family manifest (if specified)
+ if requested_version is not None and requested_version != CURRENT_VERSION:
+ package_found = False
+ for pkg in pkg_list.versions:
+ if FlexibleVersion(pkg.version) == requested_version:
+ # Found a matching package, only download that one
+ packages_to_download = [pkg]
+ package_found = True
+ break
+
+ if not package_found:
+ msg = "No matching package found in the agent manifest for requested version: {0} in incarnation: {1}, skipping agent update".format(
+ requested_version, incarnation)
+ report_error(msg, version=requested_version)
+ return False
+
+ # Set the agents to those available for download at least as current as the existing agent
+ # or to the requested version (if specified)
host = self._get_host_plugin(protocol=protocol)
- self._set_agents([GuestAgent(pkg=pkg, host=host) for pkg in pkg_list.versions])
+ self._set_agents([GuestAgent(pkg=pkg, host=host) for pkg in packages_to_download])
+ # Remove from disk any agent no longer needed in the VM.
+ # If requested version is provided, this would delete all other agents present on the VM except the
+ # current one and the requested one if they're different, and only the current one if same.
+ # Note:
+ # The code leaves on disk available, but blacklisted, agents to preserve the state.
+ # Otherwise, those agents could be downloaded again and inappropriately retried.
self._purge_agents()
self._filter_blacklisted_agents()
@@ -954,10 +1028,9 @@ def _check_and_download_agent_if_upgrade_available(self, protocol, base_version=
return not self._is_version_eligible(base_version) \
or (len(self.agents) > 0 and self.agents[0].version > base_version)
- except Exception as e: # pylint: disable=W0612
- msg = u"Exception retrieving agent manifests: {0}".format(textutil.format_exception(e))
- add_event(AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False,
- message=msg)
+ except Exception as err:
+ msg = u"Exception downloading agents for update: {0}".format(textutil.format_exception(err))
+ report_error(msg)
return False
def _write_pid_file(self):
diff --git a/tests/data/wire/ext_conf_missing_requested_version.xml b/tests/data/wire/ext_conf_missing_requested_version.xml
new file mode 100644
index 0000000000..e68fcaf995
--- /dev/null
+++ b/tests/data/wire/ext_conf_missing_requested_version.xml
@@ -0,0 +1,38 @@
+
+
+
+
+ Prod
+ 5.2.1.0
+
+ http://mock-goal-state/manifest_of_ga.xml
+
+
+
+ Test
+ 5.2.1.0
+
+ http://mock-goal-state/manifest_of_ga.xml
+
+
+
+
+
+
+
+
+
+
+ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]}
+
+
+
+
+ https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo
+
+
+
diff --git a/tests/data/wire/ga_manifest.xml b/tests/data/wire/ga_manifest.xml
index 48a1f4f94c..e12f054916 100644
--- a/tests/data/wire/ga_manifest.xml
+++ b/tests/data/wire/ga_manifest.xml
@@ -25,6 +25,12 @@
2.1.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__2.1.0
+
+ 9.9.9.10
+
+ http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__99999.0.0.0
+
+
99999.0.0.0http://mock-goal-state/ga-manifests/OSTCExtensions.WALinuxAgent__99999.0.0.0
diff --git a/tests/ga/test_update.py b/tests/ga/test_update.py
index 9ab3f4bfde..e40c05eaf3 100644
--- a/tests/ga/test_update.py
+++ b/tests/ga/test_update.py
@@ -1918,7 +1918,8 @@ def create_conf_mocks(self, hotfix_frequency, normal_frequency):
return_value=hotfix_frequency):
with patch("azurelinuxagent.common.conf.get_normal_upgrade_frequency",
return_value=normal_frequency):
- yield
+ with patch("azurelinuxagent.common.conf.get_autoupdate_gafamily", return_value="Prod"):
+ yield
@contextlib.contextmanager
def __get_update_handler(self, iterations=1, test_data=None, hotfix_frequency=1.0, normal_frequency=2.0,
@@ -1934,12 +1935,14 @@ def get_handler(url, **kwargs):
if HttpRequestPredicates.is_agent_package_request(url):
agent_pkg = load_bin_data(self._get_agent_file_name(), self._agent_zip_dir)
+ protocol.mock_wire_data.call_counts['agentArtifact'] += 1
return ResponseMock(response=agent_pkg)
return protocol.mock_wire_data.mock_http_get(url, **kwargs)
protocol.set_http_handlers(http_get_handler=get_handler)
with self.create_conf_mocks(hotfix_frequency, normal_frequency):
with patch("azurelinuxagent.ga.update.add_event") as mock_telemetry:
+ update_handler._protocol = protocol
yield update_handler, mock_telemetry
def __assert_exit_code_successful(self, exit_mock):
@@ -1958,6 +1961,13 @@ def __assert_agent_directories_available(self, versions):
for version in versions:
self.assertTrue(os.path.exists(self.agent_dir(version)), "Agent directory {0} not found".format(version))
+ def __assert_agent_directories_exist_and_others_dont_exist(self, versions):
+ self.__assert_agent_directories_available(versions=versions)
+ other_agents = [agent_dir for agent_dir in self.agent_dirs() if
+ agent_dir not in [self.agent_dir(version) for version in versions]]
+ self.assertFalse(any(other_agents),
+ "All other agents should be purged from agent dir: {0}".format(other_agents))
+
def __assert_no_agent_upgrade_telemetry(self, mock_telemetry):
self.assertEqual(0, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
"Agent upgrade discovered, updating to" in kwarg['message'] and kwarg[
@@ -2073,6 +2083,168 @@ def reload_conf(url, mock_wire_data):
'Discovered new {0} upgrade WALinuxAgent-99999.0.0.0; Will upgrade on or after'.format(
AgentUpgradeType.Normal) in msg]), 0, "Error message not propagated properly")
+ def test_it_should_download_only_requested_version_if_available(self):
+ data_file = mockwiredata.DATA_FILE.copy()
+ data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
+ with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry):
+ with patch.object(conf, "get_enable_ga_versioning", return_value=True):
+ update_handler.run(debug=True)
+
+ self.__assert_exit_code_successful(update_handler.exit_mock)
+ upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
+ 'Agent upgrade discovered, updating to WALinuxAgent-9.9.9.10 -- exiting' in kwarg[
+ 'message'] and kwarg['op'] == WALAEventOperation.AgentUpgrade]
+ self.assertEqual(1, len(upgrade_event_msgs),
+ "Did not find the event indicating that the agent was upgraded. Got: {0}".format(
+ mock_telemetry.call_args_list))
+ self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10"])
+
+ def test_it_should_cleanup_all_agents_except_requested_version_and_current_version(self):
+ data_file = mockwiredata.DATA_FILE.copy()
+ data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
+
+ # Set the test environment by adding 20 random agents to the agent directory
+ self.prepare_agents()
+ self.assertEqual(20, self.agent_count(), "Agent directories not set properly")
+
+ with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry):
+ with patch.object(conf, "get_enable_ga_versioning", return_value=True):
+ update_handler.run(debug=True)
+
+ self.__assert_exit_code_successful(update_handler.exit_mock)
+ upgrade_event_msgs = [kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
+ 'Agent upgrade discovered, updating to WALinuxAgent-9.9.9.10 -- exiting' in kwarg[
+ 'message'] and kwarg['op'] == WALAEventOperation.AgentUpgrade]
+ self.assertEqual(1, len(upgrade_event_msgs), "Agent not upgraded properly")
+ self.__assert_agent_directories_exist_and_others_dont_exist(versions=["9.9.9.10", str(CURRENT_VERSION)])
+
+ def test_it_should_not_update_if_requested_version_not_found_in_manifest(self):
+ data_file = mockwiredata.DATA_FILE.copy()
+ data_file["ext_conf"] = "wire/ext_conf_missing_requested_version.xml"
+ with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry):
+ with patch.object(conf, "get_enable_ga_versioning", return_value=True):
+ update_handler.run(debug=True)
+
+ self.__assert_exit_code_successful(update_handler.exit_mock)
+ self.__assert_no_agent_upgrade_telemetry(mock_telemetry)
+ agent_msgs = [kwarg for _, kwarg in mock_telemetry.call_args_list if
+ kwarg['op'] in (WALAEventOperation.AgentUpgrade, WALAEventOperation.Download)]
+ # This will throw if corresponding message not found so not asserting on that
+ requested_version_found = next(kwarg for kwarg in agent_msgs if
+ "Found requested version in manifest: 5.2.1.0 for incarnation: 1" in kwarg['message'])
+ self.assertTrue(requested_version_found['is_success'],
+ "The requested version found op should be reported as a success")
+
+ skipping_update = next(kwarg for kwarg in agent_msgs if
+ "No matching package found in the agent manifest for requested version: 5.2.1.0 in incarnation: 1, skipping agent update" in kwarg['message'])
+ self.assertEqual(skipping_update['version'], FlexibleVersion("5.2.1.0"),
+ "The not found message should be reported from requested agent version")
+ self.assertFalse(skipping_update['is_success'], "The not found op should be reported as a failure")
+
+ def test_it_should_only_try_downloading_requested_version_on_new_incarnation(self):
+ no_of_iterations = 1000
+
+ # Set the test environment by adding 20 random agents to the agent directory
+ self.prepare_agents()
+ self.assertEqual(20, self.agent_count(), "Agent directories not set properly")
+
+ def reload_conf(url, mock_wire_data):
+ # This function reloads the conf mid-run to mimic an actual customer scenario
+
+ if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[
+ "goalstate"] >= 10 and mock_wire_data.call_counts["goalstate"] < 15:
+
+ # Ensure we didn't try to download any agents except during the incarnation change
+ self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)])
+
+ # Update the requested version to "99999.0.0.0"
+ update_handler._protocol.mock_wire_data.set_extension_config_requested_version("99999.0.0.0")
+ reload_conf.call_count += 1
+ self._add_write_permission_to_goal_state_files()
+ reload_conf.incarnation += 1
+ mock_wire_data.set_incarnation(reload_conf.incarnation)
+
+ reload_conf.call_count = 0
+ reload_conf.incarnation = 2
+
+ data_file = mockwiredata.DATA_FILE.copy()
+ data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
+ with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf,
+ normal_frequency=0.01, hotfix_frequency=0.01) as (update_handler, mock_telemetry):
+ with patch.object(conf, "get_enable_ga_versioning", return_value=True):
+ update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION))
+ update_handler._protocol.mock_wire_data.set_incarnation(2)
+ update_handler.run(debug=True)
+
+ self.assertGreaterEqual(reload_conf.call_count, 1, "Reload conf not updated as expected")
+ self.__assert_exit_code_successful(update_handler.exit_mock)
+ self.__assert_upgrade_telemetry_emitted(mock_telemetry)
+ self.__assert_agent_directories_exist_and_others_dont_exist(versions=["99999.0.0.0", str(CURRENT_VERSION)])
+ self.assertEqual(update_handler._protocol.mock_wire_data.call_counts['agentArtifact'], 1,
+ "only 1 agent should've been downloaded - 1 per incarnation")
+ self.assertEqual(update_handler._protocol.mock_wire_data.call_counts["manifest_of_ga.xml"], 1,
+ "only 1 agent manifest call should've been made - 1 per incarnation")
+
+ def test_it_should_fallback_to_old_update_logic_if_requested_version_not_available(self):
+ no_of_iterations = 100
+
+ # Set the test environment by adding 20 random agents to the agent directory
+ self.prepare_agents()
+ self.assertEqual(20, self.agent_count(), "Agent directories not set properly")
+
+ def reload_conf(url, mock_wire_data):
+ # This function reloads the conf mid-run to mimic an actual customer scenario
+ if HttpRequestPredicates.is_goal_state_request(url) and mock_wire_data.call_counts[
+ "goalstate"] >= 5:
+ reload_conf.call_count += 1
+
+ # By this point, the GS with requested version should've been executed. Verify that
+ self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)])
+
+ # Update the ext-conf and incarnation and remove requested versions from GS,
+ # this should download all versions requested in config
+ mock_wire_data.data_files["ext_conf"] = "wire/ext_conf.xml"
+ mock_wire_data.reload()
+ self._add_write_permission_to_goal_state_files()
+ reload_conf.incarnation += 1
+ mock_wire_data.set_incarnation(reload_conf.incarnation)
+
+ reload_conf.call_count = 0
+ reload_conf.incarnation = 2
+
+ data_file = mockwiredata.DATA_FILE.copy()
+ data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
+ with self.__get_update_handler(iterations=no_of_iterations, test_data=data_file, reload_conf=reload_conf,
+ normal_frequency=0.001) as (update_handler, mock_telemetry):
+ with patch.object(conf, "get_enable_ga_versioning", return_value=True):
+ update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION))
+ update_handler._protocol.mock_wire_data.set_incarnation(2)
+ update_handler.run(debug=True)
+
+ self.assertGreater(reload_conf.call_count, 0, "Reload conf not updated")
+ self.__assert_exit_code_successful(update_handler.exit_mock)
+ self.__assert_upgrade_telemetry_emitted(mock_telemetry)
+ self.__assert_agent_directories_exist_and_others_dont_exist(
+ versions=["1.0.0", "1.1.0", "1.2.0", "2.0.0", "2.1.0", "9.9.9.10", "99999.0.0.0", str(CURRENT_VERSION)])
+
+ def test_it_should_not_download_anything_if_requested_version_is_current_version_and_delete_all_agents(self):
+ data_file = mockwiredata.DATA_FILE.copy()
+ data_file["ext_conf"] = "wire/ext_conf_requested_version.xml"
+
+ # Set the test environment by adding 20 random agents to the agent directory
+ self.prepare_agents()
+ self.assertEqual(20, self.agent_count(), "Agent directories not set properly")
+
+ with self.__get_update_handler(test_data=data_file) as (update_handler, mock_telemetry):
+ with patch.object(conf, "get_enable_ga_versioning", return_value=True):
+ update_handler._protocol.mock_wire_data.set_extension_config_requested_version(str(CURRENT_VERSION))
+ update_handler._protocol.mock_wire_data.set_incarnation(2)
+ update_handler.run(debug=True)
+
+ self.__assert_exit_code_successful(update_handler.exit_mock)
+ self.__assert_no_agent_upgrade_telemetry(mock_telemetry)
+ self.__assert_agent_directories_exist_and_others_dont_exist(versions=[str(CURRENT_VERSION)])
+
@patch('azurelinuxagent.ga.update.get_collect_telemetry_events_handler')
@patch('azurelinuxagent.ga.update.get_send_telemetry_events_handler')
diff --git a/tests/protocol/mockwiredata.py b/tests/protocol/mockwiredata.py
index b49f91569b..d7d6a09391 100644
--- a/tests/protocol/mockwiredata.py
+++ b/tests/protocol/mockwiredata.py
@@ -140,6 +140,7 @@ def __init__(self, data_files=None):
"extensionsconfiguri": 0,
"remoteaccessinfouri": 0,
"extensionArtifact": 0,
+ "agentArtifact": 0,
"manifest.xml": 0,
"manifest_of_ga.xml": 0,
"ExampleHandlerLinux": 0,