From cfafed437da7d3ebe2cb203f3507deae58df11c4 Mon Sep 17 00:00:00 2001 From: Nageswara Nandigam <84482346+nagworld9@users.noreply.github.com> Date: Thu, 9 Mar 2023 15:30:18 -0800 Subject: [PATCH] logging manifest type and don't log same error until next period in agent update. (#2778) * improve logging and don't log same error until next period * address comments * update comment * update comment --- azurelinuxagent/common/protocol/goal_state.py | 2 +- azurelinuxagent/common/protocol/wire.py | 4 +-- azurelinuxagent/ga/agent_update.py | 27 +++++++++++++------ tests/ga/test_agent_update.py | 26 ++++++++++++++++++ tests/ga/test_extension.py | 2 +- tests/protocol/test_wire.py | 8 +++--- 6 files changed, 53 insertions(+), 16 deletions(-) diff --git a/azurelinuxagent/common/protocol/goal_state.py b/azurelinuxagent/common/protocol/goal_state.py index 0980ca9d02..3d02268ced 100644 --- a/azurelinuxagent/common/protocol/goal_state.py +++ b/azurelinuxagent/common/protocol/goal_state.py @@ -185,7 +185,7 @@ def fetch_extension_manifest(self, extension_name, uris): def _fetch_manifest(self, manifest_type, name, uris): try: is_fast_track = self.extensions_goal_state.source == GoalStateSource.FastTrack - xml_text = self._wire_client.fetch_manifest(uris, use_verify_header=is_fast_track) + xml_text = self._wire_client.fetch_manifest(manifest_type, uris, use_verify_header=is_fast_track) self._history.save_manifest(name, xml_text) return ExtensionManifest(xml_text) except Exception as e: diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 38a3e0621d..ea397f0497 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -582,8 +582,8 @@ def call_storage_service(http_req, *args, **kwargs): def fetch_artifacts_profile_blob(self, uri): return self._fetch_content("artifacts profile blob", [uri], use_verify_header=False)[1] # _fetch_content returns a (uri, content) tuple - def fetch_manifest(self, uris, use_verify_header): - uri, content = self._fetch_content("manifest", uris, use_verify_header=use_verify_header) + def fetch_manifest(self, manifest_type, uris, use_verify_header): + uri, content = self._fetch_content("{0} manifest".format(manifest_type), uris, use_verify_header=use_verify_header) self.get_host_plugin().update_manifest_uri(uri) return content diff --git a/azurelinuxagent/ga/agent_update.py b/azurelinuxagent/ga/agent_update.py index 206a628eaa..3728e57ed8 100644 --- a/azurelinuxagent/ga/agent_update.py +++ b/azurelinuxagent/ga/agent_update.py @@ -29,6 +29,8 @@ def __init__(self, protocol): self._gs_id = self._protocol.get_goal_state().extensions_goal_state.id self._last_attempted_update_time = datetime.datetime.min self._last_attempted_update_version = FlexibleVersion("0.0.0.0") + self._last_warning = "" + self._last_warning_time = datetime.datetime.min def __should_update_agent(self, requested_version): """ @@ -170,15 +172,24 @@ def __get_all_agents_on_disk(): path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)] - @staticmethod - def __log_event(level, msg_, success_=True): - if level == LogLevel.WARNING: - logger.warn(msg_) - elif level == LogLevel.ERROR: - logger.error(msg_) - elif level == LogLevel.INFO: + def __log_event(self, level, msg_, success_=True): + if level == LogLevel.INFO: logger.info(msg_) - add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) + else: + msg_ += "[NOTE: Will not log the same error for the next 6 hours]" + # Incarnation may change if we get new goal state that would make whole string unique every time. So comparing only the substring until Incarnation if Incarnation included in msg + # Example msg "Unable to update Agent: No manifest links found for agent family: Prod for incarnation: incarnation_1, skipping agent update" + prefix_msg = msg_.split("incarnation", 1)[0] + prefix_last_warning_msg = self._last_warning.split("incarnation", 1)[0] + if prefix_msg != prefix_last_warning_msg or self._last_warning_time == datetime.datetime.min or datetime.datetime.now() >= self._last_warning_time + datetime.timedelta(hours=6): + if level == LogLevel.WARNING: + logger.warn(msg_) + elif level == LogLevel.ERROR: + logger.error(msg_) + add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False) + self._last_warning_time = datetime.datetime.now() + self._last_warning = msg_ def run(self, goal_state): try: diff --git a/tests/ga/test_agent_update.py b/tests/ga/test_agent_update.py index f484a1dae0..73339d7c3c 100644 --- a/tests/ga/test_agent_update.py +++ b/tests/ga/test_agent_update.py @@ -309,3 +309,29 @@ def get_handler(url, **kwargs): self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status) self.assertEqual(1, vm_agent_update_status.code) self.assertIn("Missing requested version", vm_agent_update_status.message) + + def test_it_should_not_log_same_error_next_hours(self): + data_file = DATA_FILE.copy() + data_file["ext_conf"] = "wire/ext_conf_missing_family.xml" + + # Set the test environment by adding 20 random agents to the agent directory + self.prepare_agents() + self.assertEqual(20, self.agent_count(), "Agent directories not set properly") + + with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry): + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")), + "New agent directory should not be found") + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") + + agent_update_handler.run(agent_update_handler._protocol.get_goal_state()) + + self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if + "No manifest links found for agent family" in kwarg[ + 'message'] and kwarg[ + 'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS") \ No newline at end of file diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 10f442749d..5309b80566 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -3185,7 +3185,7 @@ def manifest_location_handler(url, **kwargs): wire._DOWNLOAD_TIMEOUT = datetime.timedelta(minutes=0) try: with self.assertRaises(ExtensionDownloadError): - protocol.client.fetch_manifest(ext_handlers[0].manifest_uris, use_verify_header=False) + protocol.client.fetch_manifest("extension", ext_handlers[0].manifest_uris, use_verify_header=False) finally: wire._DOWNLOAD_TIMEOUT = download_timeout diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index ebe925bc33..cc73b0fb45 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -684,7 +684,7 @@ def http_get_handler(url, *_, **__): with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol: HostPluginProtocol.is_default_channel = False - manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded') @@ -707,7 +707,7 @@ def http_get_handler(url, *_, **kwargs): HostPluginProtocol.is_default_channel = False try: - manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded') @@ -744,7 +744,7 @@ def http_get_handler(url, *_, **kwargs): protocol.client.get_host_plugin() protocol.set_http_handlers(http_get_handler=http_get_handler) - manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(manifest, manifest_xml) @@ -778,7 +778,7 @@ def http_get_handler(url, *_, **kwargs): protocol.set_http_handlers(http_get_handler=http_get_handler) with self.assertRaises(ExtensionDownloadError): - protocol.client.fetch_manifest([manifest_url], use_verify_header=False) + protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False) urls = protocol.get_tracked_urls() self.assertEqual(len(urls), 4, "Unexpected number of HTTP requests: [{0}]".format(urls))