Skip to content

Commit

Permalink
log the actual exceptions for metadata retry paths. (#1441)
Browse files Browse the repository at this point in the history
* Update issue templates (#1438)

Adding a BugReport template for gathering context better.

* run_get_output: log expected errors as info (#1437)

* run_get_output: log expected errors as info

* add extra checks

* python 3.4 compat

* python 3.4 compat

* log the actual exceptions for metadata retry paths. Catching non explicit exception types hides the problems.

* Ensure we dont accidentally process any response body during 304 in metadata server
  • Loading branch information
adepue authored and narrieta committed Jan 12, 2019
1 parent 669aeec commit bff7805
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 6 deletions.
23 changes: 23 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
name: Bug report
about: Create a report to help us improve
title: "[BUG]"
labels: triage
assignees: narrieta, pgombar, vrdmr

---

**Describe the bug**
A clear and concise description of what the bug is.

Note: Please add some context which would help us understand the problem better
1. Section of the log where the error occurs.
2. Serial console output
3. Steps to reproduce the behavior.

**Distro and WALinuxAgent details (please complete the following information):**
- Distro and Version: [e.g. Ubuntu 16.04]
- WALinuxAgent version [e.g. 2.2.34, you can copy the output of `waagent --version`, more info [here](https://github.com/Azure/WALinuxAgent/wiki/FAQ#what-does-goal-state-agent-mean-in-waagent---version-output) ]

**Additional context**
Add any other context about the problem here.
2 changes: 1 addition & 1 deletion azurelinuxagent/common/osutil/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def get_firewall_dropped_packets(self, dst_ip=None):
try:
wait = self.get_firewall_will_wait()

rc, output = shellutil.run_get_output(FIREWALL_PACKETS.format(wait), log_cmd=False)
rc, output = shellutil.run_get_output(FIREWALL_PACKETS.format(wait), log_cmd=False, expected_errors=[3])
if rc == 3:
# Transient error that we ignore. This code fires every loop
# of the daemon (60m), so we will get the value eventually.
Expand Down
27 changes: 24 additions & 3 deletions azurelinuxagent/common/protocol/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import os
import shutil
import re
import sys
import traceback

import azurelinuxagent.common.conf as conf
import azurelinuxagent.common.utils.fileutil as fileutil
Expand Down Expand Up @@ -51,6 +53,14 @@
RETRY_PING_INTERVAL = 10


def get_traceback(e):
if sys.version_info[0] == 3:
return e.__traceback__
elif sys.version_info[0] == 2:
ex_type, ex, tb = sys.exc_info()
return tb


def _add_content_type(headers):
if headers is None:
headers = {}
Expand Down Expand Up @@ -92,13 +102,20 @@ def _get_data(self, url, headers=None):
raise ProtocolError(ustr(e))

# NOT_MODIFIED (304) response means the call was successful, so allow that to proceed.
if restutil.request_failed(resp) and not restutil.request_not_modified(resp):
is_not_modified = restutil.request_not_modified(resp)
if restutil.request_failed(resp) and not is_not_modified:
raise ProtocolError("{0} - GET: {1}".format(resp.status, url))

data = resp.read()
etag = resp.getheader('ETag')

# If the response was 304, then explicilty set data to None
if is_not_modified:
data = None

if data is not None:
data = json.loads(ustr(data, encoding="utf-8"))

return data, etag

def _put_data(self, url, data, headers=None):
Expand Down Expand Up @@ -250,7 +267,7 @@ def get_ext_handlers(self, last_etag=None):
}
ext_list = ExtHandlerList()
data, etag = self._get_data(self.ext_uri, headers=headers)
if last_etag is None or last_etag < etag:
if last_etag is None or last_etag != etag:
set_properties("extensionHandlers", ext_list.extHandlers, data)
return ext_list, etag

Expand Down Expand Up @@ -314,8 +331,12 @@ def update_goal_state(self, forced=False, max_retry=3):
try:
self.update_certs()
return
except:
except Exception as e:
logger.verbose("Incarnation is out of date. Update goalstate.")
msg = u"Exception updating certs: {0}".format(ustr(e))
logger.warn(msg)
detailed_msg = '{0} {1}'.format(msg, traceback.extract_tb(get_traceback(e)))
logger.verbose(detailed_msg)
raise ProtocolError("Exceeded max retry updating goal state")

def download_ext_handler_pkg(self, uri, destination, headers=None, use_proxy=True):
Expand Down
7 changes: 5 additions & 2 deletions azurelinuxagent/common/utils/shellutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def run(cmd, chk_err=True):
return retcode


def run_get_output(cmd, chk_err=True, log_cmd=True):
def run_get_output(cmd, chk_err=True, log_cmd=True, expected_errors=[]):
"""
Wrapper for subprocess.check_output.
Execute 'cmd'. Returns return code and STDOUT, trapping expected
Expand All @@ -97,7 +97,10 @@ def run_get_output(cmd, chk_err=True, log_cmd=True):
msg = u"Command: [{0}], " \
u"return code: [{1}], " \
u"result: [{2}]".format(cmd, e.returncode, output)
logger.error(msg)
if e.returncode in expected_errors:
logger.info(msg)
else:
logger.error(msg)
return e.returncode, output
except Exception as e:
if chk_err:
Expand Down
55 changes: 55 additions & 0 deletions tests/utils/test_shell_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,60 @@ def test_shellquote(self):
self.assertEqual("\'foo bar\'", shellutil.quote("foo bar"))
self.assertEqual("'foo'\\''bar'", shellutil.quote("foo\'bar"))

def test_it_should_log_command_failures_as_errors(self):
return_code = 99
command = "exit {0}".format(return_code)

with patch("azurelinuxagent.common.utils.shellutil.logger", autospec=True) as mock_logger:
shellutil.run_get_output(command, log_cmd=False)

self.assertEquals(mock_logger.error.call_count, 1)

args, kwargs = mock_logger.error.call_args
message = args[0] # message is similar to "Command: [exit 99], return code: [99], result: []"
self.assertIn("[{0}]".format(command), message)
self.assertIn("[{0}]".format(return_code), message)

self.assertEquals(mock_logger.verbose.call_count, 0)
self.assertEquals(mock_logger.info.call_count, 0)
self.assertEquals(mock_logger.warn.call_count, 0)

def test_it_should_log_expected_errors_as_info(self):
return_code = 99
command = "exit {0}".format(return_code)

with patch("azurelinuxagent.common.utils.shellutil.logger", autospec=True) as mock_logger:
shellutil.run_get_output(command, log_cmd=False, expected_errors=[return_code])

self.assertEquals(mock_logger.info.call_count, 1)

args, kwargs = mock_logger.info.call_args
message = args[0] # message is similar to "Command: [exit 99], return code: [99], result: []"
self.assertIn("[{0}]".format(command), message)
self.assertIn("[{0}]".format(return_code), message)

self.assertEquals(mock_logger.verbose.call_count, 0)
self.assertEquals(mock_logger.warn.call_count, 0)
self.assertEquals(mock_logger.error.call_count, 0)

def test_it_should_log_unexpected_errors_as_errors(self):
return_code = 99
command = "exit {0}".format(return_code)

with patch("azurelinuxagent.common.utils.shellutil.logger", autospec=True) as mock_logger:
shellutil.run_get_output(command, log_cmd=False, expected_errors=[return_code + 1])

self.assertEquals(mock_logger.error.call_count, 1)

args, kwargs = mock_logger.error.call_args
message = args[0] # message is similar to "Command: [exit 99], return code: [99], result: []"
self.assertIn("[{0}]".format(command), message)
self.assertIn("[{0}]".format(return_code), message)

self.assertEquals(mock_logger.info.call_count, 0)
self.assertEquals(mock_logger.verbose.call_count, 0)
self.assertEquals(mock_logger.warn.call_count, 0)


if __name__ == '__main__':
unittest.main()

0 comments on commit bff7805

Please sign in to comment.