Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Helper to handle exception message #2305

Merged
merged 11 commits into from
Jul 19, 2021
7 changes: 3 additions & 4 deletions azurelinuxagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import subprocess
import sys
import threading
import traceback
from azurelinuxagent.common import cgroupconfigurator, logcollector
from azurelinuxagent.common.cgroupapi import SystemdCgroupsApi

Expand All @@ -38,7 +37,7 @@
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.logcollector import LogCollector, OUTPUT_RESULTS_FILE_PATH
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.common.utils import fileutil, textutil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.utils.networkutil import AddFirewallRules
from azurelinuxagent.common.version import AGENT_NAME, AGENT_LONG_VERSION, AGENT_VERSION, \
Expand Down Expand Up @@ -298,10 +297,10 @@ def main(args=None):
agent.collect_logs(log_collector_full_mode)
elif command == AgentCommands.SetupFirewall:
agent.setup_firewall(firewall_metadata)
except Exception:
except Exception as e:
logger.error(u"Failed to run '{0}': {1}",
command,
traceback.format_exc())
textutil.format_exception(e))


def parse_args(sys_args):
Expand Down
1 change: 1 addition & 0 deletions azurelinuxagent/common/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ def add_event(self, name, op=WALAEventOperation.Unknown, is_success=True, durati
self.add_common_event_parameters(event, datetime.utcnow())

data = get_properties(event)

try:
self.save_event(json.dumps(data))
except EventError as e:
Expand Down
7 changes: 3 additions & 4 deletions azurelinuxagent/common/persist_firewall_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
#
import os
import sys
import traceback

import azurelinuxagent.common.conf as conf
from azurelinuxagent.common import logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil, systemd
from azurelinuxagent.common.utils import shellutil, fileutil
from azurelinuxagent.common.utils import shellutil, fileutil, textutil
from azurelinuxagent.common.utils.networkutil import AddFirewallRules
from azurelinuxagent.common.utils.shellutil import CommandError

Expand Down Expand Up @@ -275,9 +274,9 @@ def __log_network_setup_service_logs(self):
msg = "Unable to fetch service logs, Command: {0} failed with ExitCode: {1}\nStdout: {2}\nStderr: {3}".format(
' '.join(cmd), error.returncode, error.stdout, error.stderr)
logger.warn(msg)
except Exception:
except Exception as e:
msg = "Ran into unexpected error when getting logs for {0} service. Error: {1}".format(
self._network_setup_service_name, traceback.format_exc())
self._network_setup_service_name, textutil.format_exception(e))
logger.warn(msg)

# Log service status and logs if we can fetch them from journalctl and send it to Kusto,
Expand Down
5 changes: 2 additions & 3 deletions azurelinuxagent/common/protocol/wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import os
import random
import time
import traceback
import xml.sax.saxutils as saxutils
from collections import defaultdict
from datetime import datetime, timedelta
Expand Down Expand Up @@ -808,7 +807,7 @@ def try_update_goal_state(self):
self._last_try_update_goal_state_failed = True
message = u"An error occurred while retrieving the goal state: {0}".format(ustr(e))
add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=False, message=message, log_event=False)
message = u"An error occurred while retrieving the goal state: {0}".format(ustr(traceback.format_exc()))
message = u"An error occurred while retrieving the goal state: {0}".format(textutil.format_exception(e))
logger.warn(message)
message = u"Attempts to retrieve the goal state are failing: {0}".format(ustr(e))
logger.periodic_warn(logger.EVERY_SIX_HOURS, "[PERIODIC] {0}".format(message))
Expand Down Expand Up @@ -1207,7 +1206,7 @@ def _send_event(provider_id, debug_info):
events_per_provider[event.providerId] += 1

except Exception as error:
logger.warn("Unexpected error when generating Events: {0}, {1}", ustr(error), traceback.format_exc())
logger.warn("Unexpected error when generating Events:{0}", textutil.format_exception(error))

# Send out all events left in buffer.
for provider_id in list(buf.keys()):
Expand Down
5 changes: 2 additions & 3 deletions azurelinuxagent/common/utils/restutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import re
import threading
import time
import traceback
import socket
import struct

Expand Down Expand Up @@ -626,6 +625,6 @@ def read_response_error(resp):

result = textutil.replace_non_ascii(result)

except Exception:
logger.warn(traceback.format_exc())
except Exception as e:
logger.warn(textutil.format_exception(e))
return result
19 changes: 19 additions & 0 deletions azurelinuxagent/common/utils/textutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import string
import struct
import sys
import traceback
import xml.dom.minidom as minidom
import zlib

Expand Down Expand Up @@ -430,3 +431,21 @@ def str_to_encoded_ustr(s, encoding='utf-8'):
return ustr(s)
# For Py2, explicitly convert the string to unicode with the specified encoding
return ustr(s, encoding=encoding)


def format_exception(exception):
# Function to format exception message
e = None
if sys.version_info[0] == 2:
_, e, tb = sys.exc_info()
else:
tb = exception.__traceback__

msg = ustr(exception) + "\n"
if tb is None or (sys.version_info[0] == 2 and e != exception):
msg += "[Traceback not available]"
else:
msg += ''.join(traceback.format_exception(etype=type(exception), value=exception, tb=tb))

return msg

4 changes: 2 additions & 2 deletions azurelinuxagent/daemon/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import os
import sys
import time
import traceback

import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
Expand All @@ -32,6 +31,7 @@
from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.protocol.wire import WireProtocol
from azurelinuxagent.common.rdma import setup_rdma_device
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common.version import AGENT_NAME, AGENT_LONG_NAME, \
AGENT_VERSION, \
DISTRO_NAME, DISTRO_VERSION, PY_VERSION_MAJOR, PY_VERSION_MINOR, \
Expand Down Expand Up @@ -82,7 +82,7 @@ def run(self, child_args=None):
try:
self.daemon(child_args)
except Exception as e: # pylint: disable=W0612
err_msg = traceback.format_exc()
err_msg = textutil.format_exception(e)
add_event(name=AGENT_NAME, is_success=False, message=ustr(err_msg),
op=WALAEventOperation.UnhandledError)
logger.warn("Daemon ended with exception -- Sleep 15 seconds and restart daemon")
Expand Down
10 changes: 5 additions & 5 deletions azurelinuxagent/ga/collect_telemetry_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import os
import re
import threading
import traceback
from collections import defaultdict

import azurelinuxagent.common.logger as logger
Expand All @@ -35,6 +34,7 @@
from azurelinuxagent.common.interfaces import ThreadHandlerInterface
from azurelinuxagent.common.telemetryevent import TelemetryEvent, TelemetryEventParam, \
GuestAgentGenericLogsSchema, GuestAgentExtensionEventsSchema
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.ga.exthandlers import HANDLER_NAME_PATTERN
from azurelinuxagent.ga.periodic_operation import PeriodicOperation

Expand Down Expand Up @@ -116,8 +116,8 @@ def _operation(self):
# the telemetry service comes back up
delete_all_event_files = False
except Exception as error:
msg = "Unknown error occurred when trying to collect extension events. Error: {0}, Stack: {1}".format(
ustr(error), traceback.format_exc())
msg = "Unknown error occurred when trying to collect extension events:{0}".format(
textutil.format_exception(error))
add_event(op=WALAEventOperation.ExtensionTelemetryEventProcessing, message=msg, is_success=False)
finally:
# Always ensure that the events directory are being deleted each run except when Telemetry Service is stopped,
Expand Down Expand Up @@ -204,8 +204,8 @@ def _capture_extension_events(self, handler_name, handler_event_dir_path):
# This is a trade-off between data replication vs data loss.
raise
except Exception as error:
msg = "Failed to process event file {0}: {1}, {2}".format(event_file, ustr(error),
traceback.format_exc())
msg = "Failed to process event file {0}:{1}".format(event_file,
textutil.format_exception(error))
logger.warn(msg)
add_log_event(level=logger.LogLevel.WARNING, message=msg, forced=True)
finally:
Expand Down
37 changes: 14 additions & 23 deletions azurelinuxagent/ga/exthandlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@
import re
import shutil
import stat
import sys
import tempfile
import time
import traceback
import zipfile
from collections import defaultdict
from functools import partial
Expand All @@ -49,6 +47,7 @@
from azurelinuxagent.common.future import ustr, is_file_not_found_error
from azurelinuxagent.common.protocol.restapi import ExtensionStatus, ExtensionSubStatus, ExtHandler, ExtHandlerStatus, \
VMStatus, GoalStateAggregateStatus, ExtensionState, ExtHandlerRequestedState, Extension
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common.utils.archive import ARCHIVE_DIRECTORY_NAME
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION, \
Expand Down Expand Up @@ -113,14 +112,6 @@ class ExtCommandEnvVariable(object):
ExtensionSupportedFeatures = "{0}_EXTENSION_SUPPORTED_FEATURES".format(Prefix)


def get_traceback(e): # pylint: disable=R1710
if sys.version_info[0] == 3: # pylint: disable=R1705
return e.__traceback__
elif sys.version_info[0] == 2:
ex_type, ex, tb = sys.exc_info() # pylint: disable=W0612
return tb


def validate_has_key(obj, key, full_key_path):
if key not in obj:
raise ExtensionStatusError(msg="Invalid status format by extension: Missing {0} key".format(full_key_path),
Expand Down Expand Up @@ -331,7 +322,7 @@ def run(self):

activity_id, correlation_id, gs_creation_time = self.get_goal_state_debug_metadata()
except Exception as error:
msg = u"ProcessExtensionsInGoalState - Exception processing extension handlers: {0}\n{1}".format(ustr(error), traceback.extract_tb(get_traceback(error)))
msg = u"ProcessExtensionsInGoalState - Exception processing extension handlers:{0}".format(textutil.format_exception(error))
logger.warn(msg)
add_event(op=WALAEventOperation.ExtensionProcessing, is_success=False, message=msg, log_event=False)
return
Expand All @@ -351,7 +342,7 @@ def goal_state_debug_info(duration=None):
self.__process_and_handle_extensions(etag)
self._cleanup_outdated_handlers()
except Exception as error:
error = u"ProcessExtensionsInGoalState - Exception processing extension handlers: {0}\n{1}".format(ustr(error), traceback.extract_tb(get_traceback(error)))
error = u"ProcessExtensionsInGoalState - Exception processing extension handlers:{0}".format(textutil.format_exception(error))
finally:
duration = elapsed_milliseconds(utc_start)
if error is None:
Expand Down Expand Up @@ -388,7 +379,7 @@ def __process_and_handle_extensions(self, etag):
code=GoalStateAggregateStatusCodes.Success,
message="GoalState executed successfully")
except Exception as error:
msg = "Unexpected error when processing goal state: {0}; {1}".format(ustr(error), traceback.format_exc())
msg = "Unexpected error when processing goal state:{0}".format(textutil.format_exception(error))
self.__gs_aggregate_status = GoalStateAggregateStatus(status=GoalStateStatus.Failed, seq_no=etag,
code=GoalStateAggregateStatusCodes.GoalStateUnknownFailure,
message=msg)
Expand Down Expand Up @@ -585,9 +576,9 @@ def wait_for_handler_completion(handler_i, wait_until, extension=None):
break
time.sleep(5)

except Exception:
except Exception as e:
msg = "Failed to wait for Handler completion due to unknown error. Marking the dependent extension as failed: {0}, {1}".format(
extension_name, traceback.format_exc())
extension_name, textutil.format_exception(e))
raise Exception(msg)

# In case of timeout or terminal error state, we log it and raise
Expand Down Expand Up @@ -969,7 +960,7 @@ def report_ext_handlers_status(self, incarnation_changed=False):
self.write_ext_handlers_status_to_info_file(vm_status)

except Exception as error:
msg = u"Failed to report status: {0}\n{1}".format(ustr(error), traceback.extract_tb(get_traceback(error)))
msg = u"Failed to report status: {0}".format(textutil.format_exception(error))
logger.warn(msg)
add_event(AGENT_NAME,
version=CURRENT_VERSION,
Expand Down Expand Up @@ -1788,11 +1779,11 @@ def get_extension_handler_statuses(self, handler_status, incarnation_changed):
ext_handler_status = ExtHandlerStatus()
set_properties("ExtHandlerStatus", ext_handler_status, get_properties(handler_status))
except Exception as error:
msg = "Something went wrong when trying to get a copy of the Handler status for {0}: {1}".format(
self.get_extension_full_name(), ustr(error))
msg = "Something went wrong when trying to get a copy of the Handler status for {0}".format(
self.get_extension_full_name())
self.report_error_on_incarnation_change(incarnation_changed, event_msg=msg,
log_msg="{0}.\nStack Trace: {1}".format(
msg, traceback.format_exc()))
msg, textutil.format_exception(error)))
# Since this is a Handler level error and we need to do it per extension, breaking here and logging
# error since we wont be able to report error anyways and saving it as a handler status (legacy behavior)
self.set_handler_status(message=msg, code=-1)
Expand All @@ -1806,11 +1797,11 @@ def get_extension_handler_statuses(self, handler_status, incarnation_changed):
ext_handler_statuses.append(ext_handler_status)
except ExtensionError as error:

msg = "Unknown error when trying to fetch status from extension {0}: {1}".format(
self.get_extension_full_name(ext), ustr(error))
msg = "Unknown error when trying to fetch status from extension {0}".format(
self.get_extension_full_name(ext))
self.report_error_on_incarnation_change(incarnation_changed, event_msg=msg,
log_msg="{0}.\nStack Trace: {1}".format(
msg, traceback.format_exc()),
msg, textutil.format_exception(error)),
extension=ext)

# Unexpected error, for single config, keep the behavior as is
Expand Down Expand Up @@ -2085,7 +2076,7 @@ def set_handler_status(self, status="NotReady", message="", code=0):
self.ext_handler.name, self.ext_handler.properties.version))
except (IOError, ValueError, ProtocolError) as error:
fileutil.clean_ioerror(error, paths=[status_file])
self.logger.error("Failed to save handler status: {0}, {1}", ustr(error), traceback.format_exc())
self.logger.error("Failed to save handler status: {0}", textutil.format_exception(error))

def get_handler_status(self):
state_dir = self.get_conf_dir()
Expand Down
4 changes: 2 additions & 2 deletions azurelinuxagent/ga/remoteaccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@

import os
import os.path
import traceback
from datetime import datetime, timedelta

import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common.utils.cryptutil import CryptUtil
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION

Expand Down Expand Up @@ -57,7 +57,7 @@ def run(self):
self._remote_access = self._protocol.client.get_remote_access()
self._handle_remote_access()
except Exception as e:
msg = u"Exception processing goal state for remote access users: {0} {1}".format(ustr(e), traceback.format_exc())
msg = u"Exception processing goal state for remote access users: {0}".format(textutil.format_exception(e))
add_event(AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.RemoteAccessHandling,
Expand Down
9 changes: 4 additions & 5 deletions azurelinuxagent/ga/send_telemetry_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
import datetime
import threading
import time
import traceback

from azurelinuxagent.common import logger
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import ServiceStoppedError
from azurelinuxagent.common.future import ustr, Queue, Empty
from azurelinuxagent.common.interfaces import ThreadHandlerInterface
from azurelinuxagent.common.utils import textutil


def get_send_telemetry_events_handler(protocol_util):
Expand Down Expand Up @@ -135,8 +135,8 @@ def _process_telemetry_thread(self):
self._send_events_in_queue(first_event)

except Exception as error:
err_msg = "An unknown error occurred in the {0} thread main loop, stopping thread. Error: {1}, Stack: {2}".format(
self.get_thread_name(), ustr(error), traceback.format_exc())
err_msg = "An unknown error occurred in the {0} thread main loop, stopping thread.{1}".format(
self.get_thread_name(), textutil.format_exception(error))
add_event(op=WALAEventOperation.UnhandledError, message=err_msg, is_success=False)

def _send_events_in_queue(self, first_event):
Expand All @@ -161,5 +161,4 @@ def _get_events_in_queue(self, first_event):
self._queue.task_done()
yield event
except Exception as error:
logger.error("Some exception when fetching event from queue: {0}, {1}".format(ustr(error),
traceback.format_exc()))
logger.error("Some exception when fetching event from queue: {0}".format(textutil.format_exception(error)))
Loading