Skip to content

Commit

Permalink
logging manifest type and don't log same error until next period in a…
Browse files Browse the repository at this point in the history
…gent update. (Azure#2778)

* improve logging and don't log same error until next period

* address comments

* update comment

* update comment
  • Loading branch information
nagworld9 committed Apr 17, 2023
1 parent eb23558 commit 91197cf
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 17 deletions.
2 changes: 1 addition & 1 deletion azurelinuxagent/common/protocol/goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def fetch_extension_manifest(self, extension_name, uris):
def _fetch_manifest(self, manifest_type, name, uris):
try:
is_fast_track = self.extensions_goal_state.source == GoalStateSource.FastTrack
xml_text = self._wire_client.fetch_manifest(uris, use_verify_header=is_fast_track)
xml_text = self._wire_client.fetch_manifest(manifest_type, uris, use_verify_header=is_fast_track)
self._history.save_manifest(name, xml_text)
return ExtensionManifest(xml_text)
except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions azurelinuxagent/common/protocol/wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,8 +582,8 @@ def call_storage_service(http_req, *args, **kwargs):
def fetch_artifacts_profile_blob(self, uri):
return self._fetch_content("artifacts profile blob", [uri], use_verify_header=False)[1] # _fetch_content returns a (uri, content) tuple

def fetch_manifest(self, uris, use_verify_header):
uri, content = self._fetch_content("manifest", uris, use_verify_header=use_verify_header)
def fetch_manifest(self, manifest_type, uris, use_verify_header):
uri, content = self._fetch_content("{0} manifest".format(manifest_type), uris, use_verify_header=use_verify_header)
self.get_host_plugin().update_manifest_uri(uri)
return content

Expand Down
27 changes: 19 additions & 8 deletions azurelinuxagent/ga/agent_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def __init__(self, protocol):
self._gs_id = self._protocol.get_goal_state().extensions_goal_state.id
self._last_attempted_update_time = datetime.datetime.min
self._last_attempted_update_version = FlexibleVersion("0.0.0.0")
self._last_warning = ""
self._last_warning_time = datetime.datetime.min

def __should_update_agent(self, requested_version):
"""
Expand Down Expand Up @@ -170,15 +172,24 @@ def __get_all_agents_on_disk():
path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))
return [GuestAgent.from_installed_agent(path=agent_dir) for agent_dir in glob.iglob(path) if os.path.isdir(agent_dir)]

@staticmethod
def __log_event(level, msg_, success_=True):
if level == LogLevel.WARNING:
logger.warn(msg_)
elif level == LogLevel.ERROR:
logger.error(msg_)
elif level == LogLevel.INFO:
def __log_event(self, level, msg_, success_=True):
if level == LogLevel.INFO:
logger.info(msg_)
add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False)
add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False)
else:
msg_ += "[NOTE: Will not log the same error for the next 6 hours]"
# Incarnation may change if we get new goal state that would make whole string unique every time. So comparing only the substring until Incarnation if Incarnation included in msg
# Example msg "Unable to update Agent: No manifest links found for agent family: Prod for incarnation: incarnation_1, skipping agent update"
prefix_msg = msg_.split("incarnation", 1)[0]
prefix_last_warning_msg = self._last_warning.split("incarnation", 1)[0]
if prefix_msg != prefix_last_warning_msg or self._last_warning_time == datetime.datetime.min or datetime.datetime.now() >= self._last_warning_time + datetime.timedelta(hours=6):
if level == LogLevel.WARNING:
logger.warn(msg_)
elif level == LogLevel.ERROR:
logger.error(msg_)
add_event(op=WALAEventOperation.AgentUpgrade, is_success=success_, message=msg_, log_event=False)
self._last_warning_time = datetime.datetime.now()
self._last_warning = msg_

def run(self, goal_state):
try:
Expand Down
26 changes: 26 additions & 0 deletions tests/ga/test_agent_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,3 +309,29 @@ def get_handler(url, **kwargs):
self.assertEqual(VMAgentUpdateStatuses.Error, vm_agent_update_status.status)
self.assertEqual(1, vm_agent_update_status.code)
self.assertIn("Missing requested version", vm_agent_update_status.message)

def test_it_should_not_log_same_error_next_hours(self):
data_file = DATA_FILE.copy()
data_file["ext_conf"] = "wire/ext_conf_missing_family.xml"

# Set the test environment by adding 20 random agents to the agent directory
self.prepare_agents()
self.assertEqual(20, self.agent_count(), "Agent directories not set properly")

with self.__get_agent_update_handler(test_data=data_file) as (agent_update_handler, mock_telemetry):
agent_update_handler.run(agent_update_handler._protocol.get_goal_state())

self.assertFalse(os.path.exists(self.agent_dir("99999.0.0.0")),
"New agent directory should not be found")

self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
"No manifest links found for agent family" in kwarg[
'message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS")

agent_update_handler.run(agent_update_handler._protocol.get_goal_state())

self.assertEqual(1, len([kwarg['message'] for _, kwarg in mock_telemetry.call_args_list if
"No manifest links found for agent family" in kwarg[
'message'] and kwarg[
'op'] == WALAEventOperation.AgentUpgrade]), "Agent manifest should not be in GS")
2 changes: 1 addition & 1 deletion tests/ga/test_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3185,7 +3185,7 @@ def manifest_location_handler(url, **kwargs):
wire._DOWNLOAD_TIMEOUT = datetime.timedelta(minutes=0)
try:
with self.assertRaises(ExtensionDownloadError):
protocol.client.fetch_manifest(ext_handlers[0].manifest_uris, use_verify_header=False)
protocol.client.fetch_manifest("extension", ext_handlers[0].manifest_uris, use_verify_header=False)
finally:
wire._DOWNLOAD_TIMEOUT = download_timeout

Expand Down
8 changes: 4 additions & 4 deletions tests/protocol/test_wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def http_get_handler(url, *_, **__):
with mock_wire_protocol(mockwiredata.DATA_FILE, http_get_handler=http_get_handler) as protocol:
HostPluginProtocol.is_default_channel = False

manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False)
manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False)

urls = protocol.get_tracked_urls()
self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded')
Expand All @@ -707,7 +707,7 @@ def http_get_handler(url, *_, **kwargs):
HostPluginProtocol.is_default_channel = False

try:
manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False)
manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False)

urls = protocol.get_tracked_urls()
self.assertEqual(manifest, manifest_xml, 'The expected manifest was not downloaded')
Expand Down Expand Up @@ -744,7 +744,7 @@ def http_get_handler(url, *_, **kwargs):
protocol.client.get_host_plugin()

protocol.set_http_handlers(http_get_handler=http_get_handler)
manifest = protocol.client.fetch_manifest([manifest_url], use_verify_header=False)
manifest = protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False)

urls = protocol.get_tracked_urls()
self.assertEqual(manifest, manifest_xml)
Expand Down Expand Up @@ -778,7 +778,7 @@ def http_get_handler(url, *_, **kwargs):
protocol.set_http_handlers(http_get_handler=http_get_handler)

with self.assertRaises(ExtensionDownloadError):
protocol.client.fetch_manifest([manifest_url], use_verify_header=False)
protocol.client.fetch_manifest("test", [manifest_url], use_verify_header=False)

urls = protocol.get_tracked_urls()
self.assertEqual(len(urls), 4, "Unexpected number of HTTP requests: [{0}]".format(urls))
Expand Down
2 changes: 1 addition & 1 deletion tests_e2e/pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ parameters:
- name: test_suites
displayName: Test Suites
type: string
default: agent_bvt
default: agent_bvt,agent_update

# NOTES:
# * 'image', 'location' and 'vm_size' override any values in the test suites/images definition
Expand Down

0 comments on commit 91197cf

Please sign in to comment.