Skip to content

Commit

Permalink
Don't test for agent log in between tests ETP (#2471)
Browse files Browse the repository at this point in the history
  • Loading branch information
larohra authored Jan 13, 2022
1 parent 2f6b446 commit 2f4b33e
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 12 deletions.
8 changes: 8 additions & 0 deletions dcr/scenario_utils/agent_log_parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import print_function

import os
import re
from datetime import datetime
Expand Down Expand Up @@ -37,6 +39,8 @@


class AgentLogRecord:
__ERROR_TAGS = ['Exception', 'Traceback', '[CGW]']

def __init__(self, match):
self.text = match.string
self.when = match.group("when")
Expand All @@ -48,6 +52,10 @@ def __init__(self, match):
def get_timestamp(self):
return datetime.strptime(self.when, u'%Y-%m-%dT%H:%M:%S.%fZ')

@property
def is_error(self):
return self.level in ('ERROR', 'WARNING') or any(err in self.text for err in self.__ERROR_TAGS)


def parse_agent_log_file(waagent_log_path=AGENT_LOG_FILE):
if not os.path.exists(waagent_log_path):
Expand Down
10 changes: 3 additions & 7 deletions dcr/scenario_utils/check_waagent_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None):
distro = "".join(get_distro())
systemd_enabled = is_systemd_distro()

error_tags = ['ERROR', 'Exception', 'Traceback', 'WARNING', '[CGW]']

#
# NOTES:
# * 'message' is matched using re.search; be sure to escape any regex metacharacters
Expand Down Expand Up @@ -80,7 +78,8 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None):
# Ignoring this error for Deb 8 as its not a blocker and since Deb 8 is old and not widely used
{
'message': r"journalctl: unrecognized option '--utc'",
'if': lambda log_line: re.match(r"(debian8\.11)\D*", distro, flags=re.IGNORECASE) is not None and log_line.level == "WARNING"
'if': lambda log_line: re.match(r"(debian8\.11)\D*", distro,
flags=re.IGNORECASE) is not None and log_line.level == "WARNING"
},
# 2021-07-09T01:46:53.307959Z INFO MonitorHandler ExtHandler [CGW] Disabling resource usage monitoring. Reason: Check on cgroups failed:
# [CGroupsException] The agent's cgroup includes unexpected processes: ['[PID: 2367] UNKNOWN']
Expand All @@ -104,16 +103,13 @@ def check_waagent_log_for_errors(waagent_log=AGENT_LOG_FILE, ignore=None):
if ignore is not None:
ignore_list.extend(ignore)

def is_error(log_line):
return any(err in log_line.text for err in error_tags)

def can_be_ignored(log_line):
return any(re.search(msg['message'], log_line.text) is not None and ('if' not in msg or msg['if'](log_line)) for msg in ignore_list)

errors = []

for agent_log_line in parse_agent_log_file(waagent_log):
if is_error(agent_log_line) and not can_be_ignored(agent_log_line):
if agent_log_line.is_error and not can_be_ignored(agent_log_line):
# Handle "/proc/net/route contains no routes" as a special case since it can take time for the
# primary interface to come up and we don't want to report transient errors as actual errors
if "/proc/net/route contains no routes" in agent_log_line.text:
Expand Down
4 changes: 4 additions & 0 deletions dcr/scenarios/extension-telemetry-pipeline/etp_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
from random import choice


def get_collect_telemetry_thread_name():
return "TelemetryEventsCollector"


def wait_for_extension_events_dir_empty(timeout=timedelta(minutes=2)):
# By ensuring events dir to be empty, we verify that the telemetry events collector has completed its run
event_dirs = glob.glob(os.path.join("/var/log/azure/", "*", "events"))
Expand Down
31 changes: 26 additions & 5 deletions dcr/scenarios/extension-telemetry-pipeline/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
import random
import time

from dotenv import load_dotenv

from dcr.scenario_utils.agent_log_parser import parse_agent_log_file
from dcr.scenario_utils.check_waagent_log import is_data_in_waagent_log, check_waagent_log_for_errors
from dcr.scenario_utils.models import get_vm_data_from_env
from dcr.scenario_utils.test_orchestrator import TestFuncObj
from dcr.scenario_utils.test_orchestrator import TestOrchestrator
from etp_helpers import add_extension_events_and_get_count, wait_for_extension_events_dir_empty
from etp_helpers import add_extension_events_and_get_count, wait_for_extension_events_dir_empty, \
get_collect_telemetry_thread_name


def add_good_extension_events_and_verify():
Expand All @@ -22,7 +21,15 @@ def add_good_extension_events_and_verify():

# Sleep for a min to ensure that the TelemetryService has enough time to send events and report errors if any
time.sleep(60)
check_waagent_log_for_errors()
telemetry_event_collector_name = get_collect_telemetry_thread_name()
errors_reported = False
for agent_log_line in parse_agent_log_file():
if agent_log_line.thread == telemetry_event_collector_name and agent_log_line.is_error:
if not errors_reported:
print(
f"waagent.log contains the following errors emitted by the {telemetry_event_collector_name} thread (none expected):")
errors_reported = True
print(agent_log_line.text.rstrip())

for ext_name in ext_event_count:
good_count = ext_event_count[ext_name]['good']
Expand Down Expand Up @@ -65,12 +72,26 @@ def verify_etp_enabled():
raise AssertionError("Event directory not found for all extensions!")


def check_agent_log():
# Since we're injecting bad events in the add_bad_events_and_verify_count() function test,
# we expect some warnings to be emitted by the agent.
# We're already verifying if these warnings are being emitted properly in the specified test, so ignoring those here.
ignore = [
{
'message': r"Dropped events for Extension: Microsoft\.(OSTCExtensions.VMAccessForLinux|Azure.Extensions.CustomScript); Details:",
'if': lambda log_line: log_line.level == "WARNING" and log_line.thread == get_collect_telemetry_thread_name()
}
]
check_waagent_log_for_errors(ignore=ignore)


if __name__ == '__main__':
tests = [
TestFuncObj("Verify ETP enabled", verify_etp_enabled, raise_on_error=True, retry=3),
TestFuncObj("Add Good extension events and verify", add_good_extension_events_and_verify),
TestFuncObj("Add Bad extension events and verify", add_bad_events_and_verify_count),
TestFuncObj("Verify all events processed", wait_for_extension_events_dir_empty),
TestFuncObj("Check Agent log", check_agent_log),
]

test_orchestrator = TestOrchestrator("ETPTests-VM", tests=tests)
Expand Down

0 comments on commit 2f4b33e

Please sign in to comment.