From bff78050a760ca3e2b47ea71a7f2a594a70d260a Mon Sep 17 00:00:00 2001 From: Adam DePue <1170996+adepue@users.noreply.github.com> Date: Fri, 11 Jan 2019 16:17:29 -0800 Subject: [PATCH] log the actual exceptions for metadata retry paths. (#1441) * Update issue templates (#1438) Adding a BugReport template for gathering context better. * run_get_output: log expected errors as info (#1437) * run_get_output: log expected errors as info * add extra checks * python 3.4 compat * python 3.4 compat * log the actual exceptions for metadata retry paths. Catching non explicit exception types hides the problems. * Ensure we dont accidentally process any response body during 304 in metadata server --- .github/ISSUE_TEMPLATE/bug_report.md | 23 +++++++++ azurelinuxagent/common/osutil/default.py | 2 +- azurelinuxagent/common/protocol/metadata.py | 27 ++++++++-- azurelinuxagent/common/utils/shellutil.py | 7 ++- tests/utils/test_shell_util.py | 55 +++++++++++++++++++++ 5 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000000..e565290d78 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,23 @@ +--- +name: Bug report +about: Create a report to help us improve +title: "[BUG]" +labels: triage +assignees: narrieta, pgombar, vrdmr + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +Note: Please add some context which would help us understand the problem better +1. Section of the log where the error occurs. +2. Serial console output +3. Steps to reproduce the behavior. + +**Distro and WALinuxAgent details (please complete the following information):** + - Distro and Version: [e.g. Ubuntu 16.04] + - WALinuxAgent version [e.g. 2.2.34, you can copy the output of `waagent --version`, more info [here](https://github.com/Azure/WALinuxAgent/wiki/FAQ#what-does-goal-state-agent-mean-in-waagent---version-output) ] + +**Additional context** +Add any other context about the problem here. diff --git a/azurelinuxagent/common/osutil/default.py b/azurelinuxagent/common/osutil/default.py index 1b237814e4..cb4986cf08 100644 --- a/azurelinuxagent/common/osutil/default.py +++ b/azurelinuxagent/common/osutil/default.py @@ -114,7 +114,7 @@ def get_firewall_dropped_packets(self, dst_ip=None): try: wait = self.get_firewall_will_wait() - rc, output = shellutil.run_get_output(FIREWALL_PACKETS.format(wait), log_cmd=False) + rc, output = shellutil.run_get_output(FIREWALL_PACKETS.format(wait), log_cmd=False, expected_errors=[3]) if rc == 3: # Transient error that we ignore. This code fires every loop # of the daemon (60m), so we will get the value eventually. diff --git a/azurelinuxagent/common/protocol/metadata.py b/azurelinuxagent/common/protocol/metadata.py index f8aca05fbb..742ffd5861 100644 --- a/azurelinuxagent/common/protocol/metadata.py +++ b/azurelinuxagent/common/protocol/metadata.py @@ -21,6 +21,8 @@ import os import shutil import re +import sys +import traceback import azurelinuxagent.common.conf as conf import azurelinuxagent.common.utils.fileutil as fileutil @@ -51,6 +53,14 @@ RETRY_PING_INTERVAL = 10 +def get_traceback(e): + if sys.version_info[0] == 3: + return e.__traceback__ + elif sys.version_info[0] == 2: + ex_type, ex, tb = sys.exc_info() + return tb + + def _add_content_type(headers): if headers is None: headers = {} @@ -92,13 +102,20 @@ def _get_data(self, url, headers=None): raise ProtocolError(ustr(e)) # NOT_MODIFIED (304) response means the call was successful, so allow that to proceed. - if restutil.request_failed(resp) and not restutil.request_not_modified(resp): + is_not_modified = restutil.request_not_modified(resp) + if restutil.request_failed(resp) and not is_not_modified: raise ProtocolError("{0} - GET: {1}".format(resp.status, url)) data = resp.read() etag = resp.getheader('ETag') + + # If the response was 304, then explicilty set data to None + if is_not_modified: + data = None + if data is not None: data = json.loads(ustr(data, encoding="utf-8")) + return data, etag def _put_data(self, url, data, headers=None): @@ -250,7 +267,7 @@ def get_ext_handlers(self, last_etag=None): } ext_list = ExtHandlerList() data, etag = self._get_data(self.ext_uri, headers=headers) - if last_etag is None or last_etag < etag: + if last_etag is None or last_etag != etag: set_properties("extensionHandlers", ext_list.extHandlers, data) return ext_list, etag @@ -314,8 +331,12 @@ def update_goal_state(self, forced=False, max_retry=3): try: self.update_certs() return - except: + except Exception as e: logger.verbose("Incarnation is out of date. Update goalstate.") + msg = u"Exception updating certs: {0}".format(ustr(e)) + logger.warn(msg) + detailed_msg = '{0} {1}'.format(msg, traceback.extract_tb(get_traceback(e))) + logger.verbose(detailed_msg) raise ProtocolError("Exceeded max retry updating goal state") def download_ext_handler_pkg(self, uri, destination, headers=None, use_proxy=True): diff --git a/azurelinuxagent/common/utils/shellutil.py b/azurelinuxagent/common/utils/shellutil.py index 8b25b814c1..a17f8c27c9 100644 --- a/azurelinuxagent/common/utils/shellutil.py +++ b/azurelinuxagent/common/utils/shellutil.py @@ -73,7 +73,7 @@ def run(cmd, chk_err=True): return retcode -def run_get_output(cmd, chk_err=True, log_cmd=True): +def run_get_output(cmd, chk_err=True, log_cmd=True, expected_errors=[]): """ Wrapper for subprocess.check_output. Execute 'cmd'. Returns return code and STDOUT, trapping expected @@ -97,7 +97,10 @@ def run_get_output(cmd, chk_err=True, log_cmd=True): msg = u"Command: [{0}], " \ u"return code: [{1}], " \ u"result: [{2}]".format(cmd, e.returncode, output) - logger.error(msg) + if e.returncode in expected_errors: + logger.info(msg) + else: + logger.error(msg) return e.returncode, output except Exception as e: if chk_err: diff --git a/tests/utils/test_shell_util.py b/tests/utils/test_shell_util.py index 1acc42283e..2d9b393965 100644 --- a/tests/utils/test_shell_util.py +++ b/tests/utils/test_shell_util.py @@ -40,5 +40,60 @@ def test_shellquote(self): self.assertEqual("\'foo bar\'", shellutil.quote("foo bar")) self.assertEqual("'foo'\\''bar'", shellutil.quote("foo\'bar")) + def test_it_should_log_command_failures_as_errors(self): + return_code = 99 + command = "exit {0}".format(return_code) + + with patch("azurelinuxagent.common.utils.shellutil.logger", autospec=True) as mock_logger: + shellutil.run_get_output(command, log_cmd=False) + + self.assertEquals(mock_logger.error.call_count, 1) + + args, kwargs = mock_logger.error.call_args + message = args[0] # message is similar to "Command: [exit 99], return code: [99], result: []" + self.assertIn("[{0}]".format(command), message) + self.assertIn("[{0}]".format(return_code), message) + + self.assertEquals(mock_logger.verbose.call_count, 0) + self.assertEquals(mock_logger.info.call_count, 0) + self.assertEquals(mock_logger.warn.call_count, 0) + + def test_it_should_log_expected_errors_as_info(self): + return_code = 99 + command = "exit {0}".format(return_code) + + with patch("azurelinuxagent.common.utils.shellutil.logger", autospec=True) as mock_logger: + shellutil.run_get_output(command, log_cmd=False, expected_errors=[return_code]) + + self.assertEquals(mock_logger.info.call_count, 1) + + args, kwargs = mock_logger.info.call_args + message = args[0] # message is similar to "Command: [exit 99], return code: [99], result: []" + self.assertIn("[{0}]".format(command), message) + self.assertIn("[{0}]".format(return_code), message) + + self.assertEquals(mock_logger.verbose.call_count, 0) + self.assertEquals(mock_logger.warn.call_count, 0) + self.assertEquals(mock_logger.error.call_count, 0) + + def test_it_should_log_unexpected_errors_as_errors(self): + return_code = 99 + command = "exit {0}".format(return_code) + + with patch("azurelinuxagent.common.utils.shellutil.logger", autospec=True) as mock_logger: + shellutil.run_get_output(command, log_cmd=False, expected_errors=[return_code + 1]) + + self.assertEquals(mock_logger.error.call_count, 1) + + args, kwargs = mock_logger.error.call_args + message = args[0] # message is similar to "Command: [exit 99], return code: [99], result: []" + self.assertIn("[{0}]".format(command), message) + self.assertIn("[{0}]".format(return_code), message) + + self.assertEquals(mock_logger.info.call_count, 0) + self.assertEquals(mock_logger.verbose.call_count, 0) + self.assertEquals(mock_logger.warn.call_count, 0) + + if __name__ == '__main__': unittest.main()