From 48fe50eaf6228964653a2e60b47bbe293f9ba8cf Mon Sep 17 00:00:00 2001 From: Varad Meru Date: Wed, 13 Nov 2019 15:58:33 -0800 Subject: [PATCH 1/3] Updating gitignore to ignore virtual environment files. (#1693) * Updating .gitignore to ignore virtual environment files. --- .gitignore | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.gitignore b/.gitignore index 563e6e15a9..0a31340c2d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] +*$py.class # Virtualenv py3env/ @@ -27,6 +28,7 @@ var/ # PyCharm .idea/ +.idea_modules/ # PyInstaller # Usually these files are written by a python script from a template @@ -72,3 +74,21 @@ bin/waagent2.0c # mac osx specific files .DS_Store + +### VirtualEnv template +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +.Python +pyvenv.cfg +.venv +pip-selfcheck.json + +# virtualenv +venv/ +ENV/ + +# dotenv +.env + +# pyenv +.python-version From 6b457b7f997a88f63f439a011f747e7f2c0eb909 Mon Sep 17 00:00:00 2001 From: Varad Meru Date: Wed, 13 Nov 2019 17:04:22 -0800 Subject: [PATCH 2/3] Renaming local goal_state_variable, adding tests and nit refactoring. (#1691) * Renaming local goal_state_variable, adding tests and nit refactoring. --- azurelinuxagent/common/protocol/wire.py | 6 +- tests/data/wire/ext_conf.xml | 2 +- tests/data/wire/ext_conf_autoupgrade.xml | 2 +- .../ext_conf_autoupgrade_internalversion.xml | 2 +- tests/data/wire/ext_conf_internalversion.xml | 2 +- .../wire/ext_conf_multiple_extensions.xml | 4 +- tests/data/wire/ext_conf_no_public.xml | 2 +- tests/data/wire/ext_conf_no_settings.xml | 2 +- tests/data/wire/ext_conf_sequencing.xml | 2 +- tests/data/wire/ext_conf_upgradeguid.xml | 2 +- tests/protocol/test_wire.py | 115 ++++++++++++------ 11 files changed, 92 insertions(+), 49 deletions(-) diff --git a/azurelinuxagent/common/protocol/wire.py b/azurelinuxagent/common/protocol/wire.py index 7f8759631f..93c3090093 100644 --- a/azurelinuxagent/common/protocol/wire.py +++ b/azurelinuxagent/common/protocol/wire.py @@ -891,11 +891,11 @@ def get_current_handlers(self): def get_ext_conf(self): if self.ext_conf is None: - goal_state = self.get_goal_state() - if goal_state.ext_uri is None: + local_goal_state = self.get_goal_state() + if local_goal_state.ext_uri is None: self.ext_conf = ExtensionsConfig(None) else: - local_file = EXT_CONF_FILE_NAME.format(goal_state.incarnation) + local_file = EXT_CONF_FILE_NAME.format(local_goal_state.incarnation) local_file = os.path.join(conf.get_lib_dir(), local_file) xml_text = self.fetch_cache(local_file) self.ext_conf = ExtensionsConfig(xml_text) diff --git a/tests/data/wire/ext_conf.xml b/tests/data/wire/ext_conf.xml index 508fb98251..3bafcef1d2 100644 --- a/tests/data/wire/ext_conf.xml +++ b/tests/data/wire/ext_conf.xml @@ -22,5 +22,5 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://yuezhatest.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_autoupgrade.xml b/tests/data/wire/ext_conf_autoupgrade.xml index 1d6919e0d1..67e95c3d0e 100644 --- a/tests/data/wire/ext_conf_autoupgrade.xml +++ b/tests/data/wire/ext_conf_autoupgrade.xml @@ -24,5 +24,5 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://yuezhatest.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_autoupgrade_internalversion.xml b/tests/data/wire/ext_conf_autoupgrade_internalversion.xml index 9b61556b41..d918482f2d 100644 --- a/tests/data/wire/ext_conf_autoupgrade_internalversion.xml +++ b/tests/data/wire/ext_conf_autoupgrade_internalversion.xml @@ -24,5 +24,5 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://yuezhatest.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_internalversion.xml b/tests/data/wire/ext_conf_internalversion.xml index 9b61556b41..d918482f2d 100644 --- a/tests/data/wire/ext_conf_internalversion.xml +++ b/tests/data/wire/ext_conf_internalversion.xml @@ -24,5 +24,5 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://yuezhatest.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_multiple_extensions.xml b/tests/data/wire/ext_conf_multiple_extensions.xml index 838decb714..c68f35eb2d 100644 --- a/tests/data/wire/ext_conf_multiple_extensions.xml +++ b/tests/data/wire/ext_conf_multiple_extensions.xml @@ -1,5 +1,5 @@ - + Prod @@ -70,7 +70,7 @@ - https://yuezhatest.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D + https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_no_public.xml b/tests/data/wire/ext_conf_no_public.xml index abbde800c0..d96b9f5356 100644 --- a/tests/data/wire/ext_conf_no_public.xml +++ b/tests/data/wire/ext_conf_no_public.xml @@ -42,5 +42,5 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK"}}]} -https://yuezhatest.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_no_settings.xml b/tests/data/wire/ext_conf_no_settings.xml index df76d547f1..16972250df 100644 --- a/tests/data/wire/ext_conf_no_settings.xml +++ b/tests/data/wire/ext_conf_no_settings.xml @@ -37,5 +37,5 @@ -https://yuezhatest.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_sequencing.xml b/tests/data/wire/ext_conf_sequencing.xml index 3120a979b7..bb215d0d57 100644 --- a/tests/data/wire/ext_conf_sequencing.xml +++ b/tests/data/wire/ext_conf_sequencing.xml @@ -31,4 +31,4 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://yuezhatest.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/data/wire/ext_conf_upgradeguid.xml b/tests/data/wire/ext_conf_upgradeguid.xml index 1526a93999..e4c30daf42 100644 --- a/tests/data/wire/ext_conf_upgradeguid.xml +++ b/tests/data/wire/ext_conf_upgradeguid.xml @@ -22,5 +22,5 @@ {"runtimeSettings":[{"handlerSettings":{"protectedSettingsCertThumbprint":"4037FBF5F1F3014F99B5D6C7799E9B20E6871CB3","protectedSettings":"MIICWgYJK","publicSettings":{"foo":"bar"}}}]} -https://yuezhatest.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D +https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo diff --git a/tests/protocol/test_wire.py b/tests/protocol/test_wire.py index 71cc9692c9..ca162ba4f9 100644 --- a/tests/protocol/test_wire.py +++ b/tests/protocol/test_wire.py @@ -16,22 +16,34 @@ # Requires Python 2.6+ and Openssl 1.0+ # +import json +import os import stat import tempfile +import time import unittest import zipfile -from azurelinuxagent.common.telemetryevent import TelemetryEvent, TelemetryEventParam -from azurelinuxagent.common.protocol.wire import * +from azurelinuxagent.common import conf +from azurelinuxagent.common.exception import InvalidContainerError, ResourceGoneError, ProtocolError, \ + ExtensionDownloadError +from azurelinuxagent.common.future import httpclient +from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol +from azurelinuxagent.common.protocol.wire import WireProtocol, WireClient, GoalState, ExtensionsConfig, \ + InVMArtifactsProfile, VMAgentManifestUri, StatusBlob, VMStatus, INCARNATION_FILE_NAME, ExtHandlerVersionUri, \ + DataContractList, socket +from azurelinuxagent.common.telemetryevent import TelemetryEvent, TelemetryEventParam, TelemetryEventList +from azurelinuxagent.common.utils import fileutil, restutil from azurelinuxagent.common.utils.shellutil import run_get_output +from azurelinuxagent.common.version import CURRENT_VERSION, DISTRO_NAME, DISTRO_VERSION from tests.ga.test_monitor import random_generator from tests.protocol import mockwiredata -from tests.tools import ANY, AgentTestCase, MagicMock, Mock, patch, running_under_travis, skip_if_predicate_true +from tests.tools import ANY, MagicMock, Mock, patch, AgentTestCase, skip_if_predicate_true, running_under_travis data_with_bom = b'\xef\xbb\xbfhehe' testurl = 'http://foo' testtype = 'BlockBlob' -wireserver_url = '168.63.129.16' +WIRESERVER_URL = '168.63.129.16' def get_event(message, duration=30000, evt_type="", is_internal=False, is_success=True, @@ -61,7 +73,7 @@ def _test_getters(self, test_data, certsMustBePresent, __, MockCryptUtil, _): MockCryptUtil.side_effect = test_data.mock_crypt_util with patch.object(restutil, 'http_get', test_data.mock_http_get): - protocol = WireProtocol(wireserver_url) + protocol = WireProtocol(WIRESERVER_URL) protocol.detect() protocol.get_vminfo() protocol.get_certs() @@ -170,19 +182,18 @@ def test_call_storage_kwargs(self, *args): self.assertTrue(c == (True if i != 3 else False)) def test_status_blob_parsing(self, *args): - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).ext_conf) self.assertEqual(wire_protocol_client.ext_conf.status_upload_blob, - u'https://yuezhatest.blob.core.windows.net/vhds/test' - u'-cs12.test-cs12.test-cs12.status?sr=b&sp=rw&se' - u'=9999-01-01&sk=key1&sv=2014-02-14&sig' - u'=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo%3D') + 'https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?' + 'sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&' + 'sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo') self.assertEqual(wire_protocol_client.ext_conf.status_upload_blob_type, u'BlockBlob') pass def test_get_host_ga_plugin(self, *args): - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client goal_state = GoalState(mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state) with patch.object(WireClient, "get_goal_state", return_value=goal_state) as patch_get_goal_state: @@ -201,7 +212,7 @@ def test_download_ext_handler_pkg_fallback(self, patch_request, patch_get_host, patch_get_host.return_value = HostPluginProtocol(host_uri, 'container_id', 'role_config') patch_request.return_value = [host_uri, {}] - WireProtocol(wireserver_url).download_ext_handler_pkg(ext_uri, destination) + WireProtocol(WIRESERVER_URL).download_ext_handler_pkg(ext_uri, destination) self.assertEqual(patch_http.call_count, 2) self.assertEqual(patch_request.call_count, 1) @@ -214,7 +225,7 @@ def test_download_ext_handler_pkg_stream(self, *args): tmp = tempfile.mkdtemp() destination = os.path.join(tmp, 'test_download_ext_handler_pkg_stream.zip') - success = WireProtocol(wireserver_url).download_ext_handler_pkg(ext_uri, destination) + success = WireProtocol(WIRESERVER_URL).download_ext_handler_pkg(ext_uri, destination) self.assertTrue(success) self.assertTrue(os.path.exists(destination)) @@ -242,7 +253,7 @@ def test_upload_status_blob_default(self, *args): Default status blob method is HostPlugin. """ vmstatus = VMStatus(message="Ready", status="Ready") - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(None) wire_protocol_client.ext_conf.status_upload_blob = testurl wire_protocol_client.ext_conf.status_upload_blob_type = testtype @@ -264,7 +275,7 @@ def test_upload_status_blob_default(self, *args): @patch("azurelinuxagent.common.protocol.wire.WireClient.update_goal_state") def test_upload_status_blob_host_ga_plugin(self, *args): vmstatus = VMStatus(message="Ready", status="Ready") - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(None) wire_protocol_client.ext_conf.status_upload_blob = testurl wire_protocol_client.ext_conf.status_upload_blob_type = testtype @@ -291,7 +302,7 @@ def test_upload_status_blob_host_ga_plugin(self, *args): @patch("azurelinuxagent.common.protocol.hostplugin.HostPluginProtocol.ensure_initialized") def test_upload_status_blob_unknown_type_assumes_block(self, _, __, *args): vmstatus = VMStatus(message="Ready", status="Ready") - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(None) wire_protocol_client.ext_conf.status_upload_blob = testurl wire_protocol_client.ext_conf.status_upload_blob_type = "NotALegalType" @@ -310,7 +321,7 @@ def test_upload_status_blob_unknown_type_assumes_block(self, _, __, *args): @patch("azurelinuxagent.common.protocol.wire.WireClient.update_goal_state") def test_upload_status_blob_reports_prepare_error(self, *args): vmstatus = VMStatus(message="Ready", status="Ready") - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(None) wire_protocol_client.ext_conf.status_upload_blob = testurl wire_protocol_client.ext_conf.status_upload_blob_type = testtype @@ -322,7 +333,7 @@ def test_upload_status_blob_reports_prepare_error(self, *args): self.assertEqual(1, mock_prepare.call_count) def test_get_in_vm_artifacts_profile_blob_not_available(self, *args): - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(None) # Test when artifacts_profile_blob is null/None @@ -333,7 +344,7 @@ def test_get_in_vm_artifacts_profile_blob_not_available(self, *args): self.assertEqual(None, wire_protocol_client.get_artifacts_profile()) def test_get_in_vm_artifacts_profile_response_body_not_valid(self, *args): - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(None) wire_protocol_client.ext_conf.artifacts_profile_blob = testurl goal_state = GoalState(mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state) @@ -362,7 +373,7 @@ def test_get_in_vm_artifacts_profile_response_body_not_valid(self, *args): @patch("azurelinuxagent.common.event.add_event") def test_artifacts_profile_json_parsing(self, patch_event, *args): - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(None) wire_protocol_client.ext_conf.artifacts_profile_blob = testurl goal_state = GoalState(mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state) @@ -382,7 +393,7 @@ def test_artifacts_profile_json_parsing(self, patch_event, *args): self.assertEqual('ArtifactsProfileBlob', patch_event.call_args[1]['op']) def test_get_in_vm_artifacts_profile_default(self, *args): - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(None) wire_protocol_client.ext_conf.artifacts_profile_blob = testurl goal_state = GoalState(mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state) @@ -403,7 +414,7 @@ def test_fetch_manifest_fallback(self, *args): mock_host = HostPluginProtocol(host_uri, 'container_id', 'role_config') - client = WireProtocol(wireserver_url).client + client = WireProtocol(WIRESERVER_URL).client with patch.object(WireClient, "fetch", return_value=None) as patch_fetch: with patch.object(WireClient, "get_host_plugin", return_value=mock_host): @@ -423,7 +434,7 @@ def test_fetch_manifest_ensure_manifest_uri_is_set(self, *args): uris.append(uri1) host_uri = 'host_uri' mock_host = HostPluginProtocol(host_uri, 'container_id', 'role_config') - client = WireProtocol(wireserver_url).client + client = WireProtocol(WIRESERVER_URL).client manifest_return = "manifest.xml" with patch.object(WireClient, "get_host_plugin", return_value=mock_host): @@ -447,7 +458,7 @@ def test_fetch_manifest_ensure_manifest_uri_is_set(self, *args): self.assertTrue(HostPluginProtocol.is_default_channel()) def test_get_in_vm_artifacts_profile_host_ga_plugin(self, *args): - wire_protocol_client = WireProtocol(wireserver_url).client + wire_protocol_client = WireProtocol(WIRESERVER_URL).client wire_protocol_client.ext_conf = ExtensionsConfig(None) wire_protocol_client.ext_conf.artifacts_profile_blob = testurl goal_state = GoalState(mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state) @@ -468,7 +479,7 @@ def test_report_vm_status(self, *args): status = 'status' message = 'message' - client = WireProtocol(wireserver_url).client + client = WireProtocol(WIRESERVER_URL).client actual = StatusBlob(client=client) actual.set_vm_status(VMStatus(status=status, message=message)) timestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) @@ -505,7 +516,7 @@ def test_send_event(self, mock_http_request, *args): mock_http_request.return_value = MockResponse("", 200) event_str = u'a test string' - client = WireProtocol(wireserver_url).client + client = WireProtocol(WIRESERVER_URL).client client.send_event("foo", event_str) first_call = mock_http_request.call_args_list[0] @@ -521,7 +532,7 @@ def test_send_event(self, mock_http_request, *args): @patch("azurelinuxagent.common.protocol.wire.WireClient.send_event") def test_report_event_small_event(self, patch_send_event, *args): event_list = TelemetryEventList() - client = WireProtocol(wireserver_url).client + client = WireProtocol(WIRESERVER_URL).client event_str = random_generator(10) event_list.events.append(get_event(message=event_str)) @@ -543,7 +554,7 @@ def test_report_event_small_event(self, patch_send_event, *args): @patch("azurelinuxagent.common.protocol.wire.WireClient.send_event") def test_report_event_multiple_events_to_fill_buffer(self, patch_send_event, *args): event_list = TelemetryEventList() - client = WireProtocol(wireserver_url).client + client = WireProtocol(WIRESERVER_URL).client event_str = random_generator(2 ** 15) event_list.events.append(get_event(message=event_str)) @@ -559,7 +570,7 @@ def test_report_event_large_event(self, patch_send_event, *args): event_list = TelemetryEventList() event_str = random_generator(2 ** 18) event_list.events.append(get_event(message=event_str)) - client = WireProtocol(wireserver_url).client + client = WireProtocol(WIRESERVER_URL).client client.report_event(event_list) self.assertEqual(patch_send_event.call_count, 0) @@ -567,6 +578,38 @@ def test_report_event_large_event(self, patch_send_event, *args): class TestWireClient(AgentTestCase): + def test_get_ext_conf_without_uri(self, *args): + test_goal_state = GoalState(mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state) + test_goal_state.ext_uri = None + wire_protocol_client = WireProtocol(WIRESERVER_URL).client + wire_protocol_client.get_goal_state = Mock(return_value=test_goal_state) + + with patch("azurelinuxagent.common.protocol.wire.WireClient.fetch_cache", + return_value=mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).ext_conf): + ext_conf = wire_protocol_client.get_ext_conf() + + self.assertEqual(0, len(ext_conf.ext_handlers.extHandlers)) + self.assertEqual(0, len(ext_conf.vmagent_manifests.vmAgentManifests)) + self.assertIsNone(ext_conf.status_upload_blob) + self.assertIsNone(ext_conf.status_upload_blob_type) + self.assertIsNone(ext_conf.artifacts_profile_blob) + + def test_get_ext_conf_with_uri(self, *args): + test_goal_state = GoalState(mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state) + wire_protocol_client = WireProtocol(WIRESERVER_URL).client + wire_protocol_client.get_goal_state = Mock(return_value=test_goal_state) + + with patch("azurelinuxagent.common.protocol.wire.WireClient.fetch_cache", + return_value=mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).ext_conf): + ext_conf = wire_protocol_client.get_ext_conf() + + self.assertEqual(1, len(ext_conf.ext_handlers.extHandlers)) + self.assertEqual(2, len(ext_conf.vmagent_manifests.vmAgentManifests)) + self.assertEqual("https://test.blob.core.windows.net/vhds/test-cs12.test-cs12.test-cs12.status?" + "sr=b&sp=rw&se=9999-01-01&sk=key1&sv=2014-02-14&sig=hfRh7gzUE7sUtYwke78IOlZOrTRCYvkec4hGZ9zZzXo", ext_conf.status_upload_blob) + self.assertEqual("BlockBlob", ext_conf.status_upload_blob_type) + self.assertEqual(None, ext_conf.artifacts_profile_blob) + def test_save_or_update_goal_state_should_save_new_goal_state_file(self): # Assert the file didn't exist before incarnation = 42 @@ -574,7 +617,7 @@ def test_save_or_update_goal_state_should_save_new_goal_state_file(self): self.assertFalse(os.path.exists(goal_state_file)) xml_text = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state - client = WireClient(wireserver_url) + client = WireClient(WIRESERVER_URL) client.save_or_update_goal_state_file(incarnation, xml_text) # Assert the file exists and its contents @@ -600,7 +643,7 @@ def test_save_or_update_goal_state_should_update_existing_goal_state_file(self): # Update the container id new_goal_state = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state.replace("c6d5526c-5ac2-4200-b6e2-56f2b70c5ab2", "z6d5526c-5ac2-4200-b6e2-56f2b70c5ab2") - client = WireClient(wireserver_url) + client = WireClient(WIRESERVER_URL) client.save_or_update_goal_state_file(incarnation, new_goal_state) # Assert the file exists and its contents @@ -621,7 +664,7 @@ def test_save_or_update_goal_state_should_update_goal_state_and_container_id_whe with open(goal_state_file, "w") as f: f.write(xml_text) - client = WireClient(wireserver_url) + client = WireClient(WIRESERVER_URL) host = client.get_host_plugin() old_container_id = host.container_id @@ -980,7 +1023,7 @@ def test_get_artifacts_profile_should_update_goal_state_and_not_change_default_c def test_send_request_using_appropriate_channel_should_not_invoke_host_channel_when_direct_channel_succeeds(self, *args): xml_text = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state - client = WireClient(wireserver_url) + client = WireClient(WIRESERVER_URL) client.goal_state = GoalState(xml_text) client.get_host_plugin().set_default_channel(False) @@ -1003,7 +1046,7 @@ def host_func(*args): def test_send_request_using_appropriate_channel_should_not_use_direct_channel_when_host_channel_is_default(self, *args): xml_text = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state - client = WireClient(wireserver_url) + client = WireClient(WIRESERVER_URL) client.goal_state = GoalState(xml_text) client.get_host_plugin().set_default_channel(True) @@ -1026,7 +1069,7 @@ def host_func(*args): def test_send_request_using_appropriate_channel_should_use_host_channel_when_direct_channel_fails(self, *args): xml_text = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state - client = WireClient(wireserver_url) + client = WireClient(WIRESERVER_URL) client.goal_state = GoalState(xml_text) host = client.get_host_plugin() host.set_default_channel(False) @@ -1052,7 +1095,7 @@ def host_func(*args): def test_send_request_using_appropriate_channel_should_retry_the_host_channel_after_reloading_goal_state(self, *args): xml_text = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE).goal_state - client = WireClient(wireserver_url) + client = WireClient(WIRESERVER_URL) client.goal_state = GoalState(xml_text) client.get_host_plugin().set_default_channel(False) From 5ef3e86b1c3a888ea54ef00b5dec6fae2f338e1c Mon Sep 17 00:00:00 2001 From: Varad Meru Date: Thu, 14 Nov 2019 14:20:14 -0800 Subject: [PATCH 3/3] Start sending PerformanceCounter metrics for Cgroups (#1651) * Sending PerformanceCounter metrics for cgroups. * Adding another test to validate the wireclient.report_event values. * Adding comments and tests for verifying period_warns * Cast the RAM, Processors, ImageOrigin to int. * Fix/Add the tests and move float casting inside report_metric. * Making sure all the required fields are getting populated, using test. * Adding casting and making the tests relying on cgroups better * Test issue: Incorrect attribute for patch & indentation issues resolved. * Skip cgroups tests for now. --- azurelinuxagent/common/cgroupstelemetry.py | 70 +++-- azurelinuxagent/common/event.py | 32 ++- azurelinuxagent/ga/monitor.py | 25 +- tests/common/test_cgroupstelemetry.py | 192 ++++++++------ tests/common/test_event.py | 37 ++- tests/ga/test_extension.py | 59 +++-- tests/ga/test_monitor.py | 285 ++++++++++++++++++--- 7 files changed, 521 insertions(+), 179 deletions(-) diff --git a/azurelinuxagent/common/cgroupstelemetry.py b/azurelinuxagent/common/cgroupstelemetry.py index 168b4d5d50..dcc586a3f2 100644 --- a/azurelinuxagent/common/cgroupstelemetry.py +++ b/azurelinuxagent/common/cgroupstelemetry.py @@ -15,11 +15,15 @@ # Requires Python 2.6+ and Openssl 1.0+ import errno import threading +from collections import namedtuple from datetime import datetime as dt from azurelinuxagent.common import logger -from azurelinuxagent.common.future import ustr from azurelinuxagent.common.exception import CGroupsException +from azurelinuxagent.common.future import ustr + + +MetricValue = namedtuple('Metric', ['category', 'counter', 'instance', 'value']) class CGroupsTelemetry(object): @@ -93,6 +97,16 @@ def stop_tracking(cgroup): @staticmethod def report_all_tracked(): + """ + The report_all_tracked's purpose is to collect the data from the tracked cgroups and process the metric into a + data structure by _process_cgroup_metric. The perf metric is added into the data structure and returned to the + caller. + + The report_all_tracked would be removed soon - in favor of sending report_metric directly, when polling the data + from tracked groups. + + :return collected_metrics: dictionary of cgroups metrics. + """ collected_metrics = {} for name, cgroup_metrics in CGroupsTelemetry._cgroup_metrics.items(): @@ -112,18 +126,43 @@ def report_all_tracked(): @staticmethod def poll_all_tracked(): + metrics = [] + with CGroupsTelemetry._rlock: for cgroup in CGroupsTelemetry._tracked[:]: - + # noinspection PyBroadException if cgroup.name not in CGroupsTelemetry._cgroup_metrics: CGroupsTelemetry._cgroup_metrics[cgroup.name] = CgroupMetrics() - - CGroupsTelemetry._cgroup_metrics[cgroup.name].collect_data(cgroup) - + try: + if cgroup.controller == "cpu": + current_cpu_usage = cgroup.get_cpu_usage() + CGroupsTelemetry._cgroup_metrics[cgroup.name].add_cpu_usage(current_cpu_usage) + metrics.append(MetricValue("Process", "% Processor Time", cgroup.name, current_cpu_usage)) + elif cgroup.controller == "memory": + current_memory_usage = cgroup.get_memory_usage() + CGroupsTelemetry._cgroup_metrics[cgroup.name].add_memory_usage(current_memory_usage) + metrics.append(MetricValue("Memory", "Total Memory Usage", cgroup.name, current_memory_usage)) + + max_memory_usage = cgroup.get_max_memory_usage() + CGroupsTelemetry._cgroup_metrics[cgroup.name].add_max_memory_usage(max_memory_usage) + metrics.append(MetricValue("Memory", "Max Memory Usage", cgroup.name, max_memory_usage)) + else: + raise CGroupsException('CGroup controller {0} is not supported for cgroup {1}'.format( + cgroup.controller, cgroup.name)) + except Exception as e: + # There can be scenarios when the CGroup has been deleted by the time we are fetching the values + # from it. This would raise IOError with file entry not found (ERRNO: 2). We do not want to log + # every occurrences of such case as it would be very verbose. We do want to log all the other + # exceptions which could occur, which is why we do a periodic log for all the other errors. + if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: + logger.periodic_warn(logger.EVERY_HOUR, '[PERIODIC] Could not collect metrics for cgroup ' + '{0}. Error : {1}'.format(cgroup.name, ustr(e))) if not cgroup.is_active(): CGroupsTelemetry.stop_tracking(cgroup) CGroupsTelemetry._cgroup_metrics[cgroup.name].marked_for_delete = True + return metrics + @staticmethod def prune_all_tracked(): with CGroupsTelemetry._rlock: @@ -145,19 +184,14 @@ def __init__(self): self._cpu_usage = Metric() self.marked_for_delete = False - def collect_data(self, cgroup): - # noinspection PyBroadException - try: - if cgroup.controller == "cpu": - self._cpu_usage.append(cgroup.get_cpu_usage()) - elif cgroup.controller == "memory": - self._memory_usage.append(cgroup.get_memory_usage()) - self._max_memory_usage.append(cgroup.get_max_memory_usage()) - else: - raise CGroupsException('CGroup controller {0} is not supported'.format(controller)) - except Exception as e: - if not isinstance(e, (IOError, OSError)) or e.errno != errno.ENOENT: - logger.periodic_warn(logger.EVERY_HALF_HOUR, 'Could not collect metrics for cgroup {0}. Error : {1}'.format(cgroup.path, ustr(e))) + def add_memory_usage(self, usage): + self._memory_usage.append(usage) + + def add_max_memory_usage(self, usage): + self._max_memory_usage.append(usage) + + def add_cpu_usage(self, usage): + self._cpu_usage.append(usage) def get_memory_usage(self): return self._memory_usage diff --git a/azurelinuxagent/common/event.py b/azurelinuxagent/common/event.py index 306faa443a..415332fe5b 100644 --- a/azurelinuxagent/common/event.py +++ b/azurelinuxagent/common/event.py @@ -35,6 +35,7 @@ _EVENT_MSG = "Event: name={0}, op={1}, message={2}, duration={3}" TELEMETRY_EVENT_PROVIDER_ID = "69B669B9-4AF8-4C50-BDC4-6006FA76E975" +TELEMETRY_METRICS_EVENT_ID = 4 # Store the last retrieved container id as an environment variable to be shared between threads for telemetry purposes CONTAINER_ID_ENV_VARIABLE = "AZURE_GUEST_AGENT_CONTAINER_ID" @@ -270,14 +271,15 @@ def add_event(self, name, op=WALAEventOperation.Unknown, is_success=True, durati def _add_event(self, duration, evt_type, is_internal, is_success, message, name, op, version, event_id): event = TelemetryEvent(event_id, TELEMETRY_EVENT_PROVIDER_ID) - event.parameters.append(TelemetryEventParam('Name', name)) + + event.parameters.append(TelemetryEventParam('Name', str(name))) event.parameters.append(TelemetryEventParam('Version', str(version))) - event.parameters.append(TelemetryEventParam('IsInternal', is_internal)) - event.parameters.append(TelemetryEventParam('Operation', op)) - event.parameters.append(TelemetryEventParam('OperationSuccess', is_success)) - event.parameters.append(TelemetryEventParam('Message', message)) - event.parameters.append(TelemetryEventParam('Duration', duration)) - event.parameters.append(TelemetryEventParam('ExtensionType', evt_type)) + event.parameters.append(TelemetryEventParam('IsInternal', bool(is_internal))) + event.parameters.append(TelemetryEventParam('Operation', str(op))) + event.parameters.append(TelemetryEventParam('OperationSuccess', bool(is_success))) + event.parameters.append(TelemetryEventParam('Message', str(message))) + event.parameters.append(TelemetryEventParam('Duration', int(duration))) + event.parameters.append(TelemetryEventParam('ExtensionType', str(evt_type))) self.add_default_parameters_to_event(event) data = get_properties(event) @@ -328,11 +330,11 @@ def add_metric(self, category, counter, instance, value, log_event=False): message = "Metric {0}/{1} [{2}] = {3}".format(category, counter, instance, value) _log_event(AGENT_NAME, "METRIC", message, 0) - event = TelemetryEvent(4, "69B669B9-4AF8-4C50-BDC4-6006FA76E975") - event.parameters.append(TelemetryEventParam('Category', category)) - event.parameters.append(TelemetryEventParam('Counter', counter)) - event.parameters.append(TelemetryEventParam('Instance', instance)) - event.parameters.append(TelemetryEventParam('Value', value)) + event = TelemetryEvent(TELEMETRY_METRICS_EVENT_ID, TELEMETRY_EVENT_PROVIDER_ID) + event.parameters.append(TelemetryEventParam('Category', str(category))) + event.parameters.append(TelemetryEventParam('Counter', str(counter))) + event.parameters.append(TelemetryEventParam('Instance', str(instance))) + event.parameters.append(TelemetryEventParam('Value', float(value))) self.add_default_parameters_to_event(event) data = get_properties(event) @@ -412,7 +414,11 @@ def report_metric(category, counter, instance, value, log_event=False, reporter= message = "Metric {0}/{1} [{2}] = {3}".format(category, counter, instance, value) _log_event(AGENT_NAME, "METRIC", message, 0) return - reporter.add_metric(category, counter, instance, value, log_event) + try: + reporter.add_metric(category, counter, instance, float(value), log_event) + except ValueError: + logger.periodic_warn(logger.EVERY_HALF_HOUR, "[PERIODIC] Cannot cast the metric value. Details of the Metric - " + "{0}/{1} [{2}] = {3}".format(category, counter, instance, value)) def add_event(name, op=WALAEventOperation.Unknown, is_success=True, duration=0, version=str(CURRENT_VERSION), message="", diff --git a/azurelinuxagent/ga/monitor.py b/azurelinuxagent/ga/monitor.py index cbdbf8d279..42511dbadf 100644 --- a/azurelinuxagent/ga/monitor.py +++ b/azurelinuxagent/ga/monitor.py @@ -31,6 +31,7 @@ from azurelinuxagent.common.errorstate import ErrorState from azurelinuxagent.common.event import add_event, WALAEventOperation, CONTAINER_ID_ENV_VARIABLE, \ get_container_id_from_env +from azurelinuxagent.common.event import add_event, WALAEventOperation, report_metric from azurelinuxagent.common.exception import EventError, ProtocolError, OSUtilError, HttpError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.osutil import get_osutil @@ -178,8 +179,8 @@ def init_sysinfo(self): try: ram = self.osutil.get_total_mem() processors = self.osutil.get_processor_cores() - self.sysinfo.append(TelemetryEventParam("RAM", ram)) - self.sysinfo.append(TelemetryEventParam("Processors", processors)) + self.sysinfo.append(TelemetryEventParam("RAM", int(ram))) + self.sysinfo.append(TelemetryEventParam("Processors", int(processors))) except OSUtilError as e: logger.warn("Failed to get system info: {0}", ustr(e)) @@ -207,7 +208,7 @@ def init_sysinfo(self): self.sysinfo.append(TelemetryEventParam('VMId', vminfo.vmId)) self.sysinfo.append(TelemetryEventParam('ImageOrigin', - vminfo.image_origin)) + int(vminfo.image_origin))) except (HttpError, ValueError) as e: logger.warn("failed to get IMDS info: {0}", ustr(e)) @@ -281,7 +282,7 @@ def daemon(self): while self.should_run: self.send_telemetry_heartbeat() self.poll_telemetry_metrics() - self.send_telemetry_metrics() + self.send_telemetry_metrics() # This will be removed in favor of poll_telemetry_metrics() and it'll directly send the perf data for each cgroup. self.collect_and_send_events() self.send_host_plugin_heartbeat() self.send_imds_heartbeat() @@ -467,16 +468,30 @@ def send_telemetry_heartbeat(self): self.last_telemetry_heartbeat = datetime.datetime.utcnow() def poll_telemetry_metrics(self): + """ + This method polls the tracked cgroups to get data from the cgroups filesystem and send the data directly. + + :return: + """ time_now = datetime.datetime.utcnow() if not self.last_cgroup_polling_telemetry: self.last_cgroup_polling_telemetry = time_now if time_now >= (self.last_cgroup_polling_telemetry + MonitorHandler.CGROUP_TELEMETRY_POLLING_PERIOD): - CGroupsTelemetry.poll_all_tracked() + metrics = CGroupsTelemetry.poll_all_tracked() self.last_cgroup_polling_telemetry = time_now + if metrics: + for metric in metrics: + report_metric(metric.category, metric.counter, metric.instance, metric.value) + def send_telemetry_metrics(self): + """ + The send_telemetry_metrics would soon be removed in favor of sending performance metrics directly. + + :return: + """ time_now = datetime.datetime.utcnow() if not self.last_cgroup_report_telemetry: diff --git a/tests/common/test_cgroupstelemetry.py b/tests/common/test_cgroupstelemetry.py index 0556cd1b31..5b7df86999 100644 --- a/tests/common/test_cgroupstelemetry.py +++ b/tests/common/test_cgroupstelemetry.py @@ -24,7 +24,7 @@ from azurelinuxagent.common.cgroup import CGroup from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry, Metric -from azurelinuxagent.common.osutil.default import BASE_CGROUPS +from azurelinuxagent.common.osutil.default import BASE_CGROUPS, DefaultOSUtil from azurelinuxagent.common.protocol.restapi import ExtHandlerProperties, ExtHandler from azurelinuxagent.ga.exthandlers import ExtHandlerInstance from nose.plugins.attrib import attr @@ -32,6 +32,13 @@ are_cgroups_enabled, is_trusty_in_travis, i_am_root +def raise_ioerror(*_): + e = IOError() + from errno import EIO + e.errno = EIO + raise e + + def median(lst): data = sorted(lst) l_len = len(data) @@ -86,6 +93,19 @@ def _assert_cgroup_metrics_equal(self, cpu_usage, memory_usage, max_memory_usage self.assertListEqual(cgroup_metric.get_max_memory_usage()._data, max_memory_usage) self.assertListEqual(cgroup_metric.get_cpu_usage()._data, cpu_usage) + def _assert_cgroup_polling_metrics_equal(self, metrics, cpu_metric_value, memory_metric_value, max_memory_metric_value): + for metric in metrics: + self.assertIn(metric.category, ["Process", "Memory"]) + if metric.category == "Process": + self.assertEqual(metric.counter, "% Processor Time") + self.assertEqual(metric.value, cpu_metric_value) + if metric.category == "Memory": + self.assertIn(metric.counter, ["Total Memory Usage", "Max Memory Usage"]) + if metric.counter == "Total Memory Usage": + self.assertEqual(metric.value, memory_metric_value) + elif metric.counter == "Max Memory Usage": + self.assertEqual(metric.value, max_memory_metric_value) + @patch("azurelinuxagent.common.cgroup.CpuCgroup._get_current_cpu_total") @patch("azurelinuxagent.common.osutil.default.DefaultOSUtil.get_total_cpu_ticks_since_boot") def test_telemetry_polling_with_active_cgroups(self, *args): @@ -115,12 +135,14 @@ def test_telemetry_polling_with_active_cgroups(self, *args): poll_count = 1 for data_count in range(poll_count, 10): - CGroupsTelemetry.poll_all_tracked() + metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(len(CGroupsTelemetry._cgroup_metrics), num_extensions) self._assert_cgroup_metrics_equal( cpu_usage=[current_cpu] * data_count, memory_usage=[current_memory] * data_count, max_memory_usage=[current_max_memory] * data_count) + self.assertEqual(len(metrics), num_extensions * 3) + self._assert_cgroup_polling_metrics_equal(metrics, current_cpu, current_memory, current_max_memory) CGroupsTelemetry.report_all_tracked() @@ -159,7 +181,7 @@ def test_telemetry_polling_with_inactive_cgroups(self, *args): self.assertTrue(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) self.assertTrue(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i))) - CGroupsTelemetry.poll_all_tracked() + metrics = CGroupsTelemetry.poll_all_tracked() for i in range(num_extensions): self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i))) @@ -170,6 +192,8 @@ def test_telemetry_polling_with_inactive_cgroups(self, *args): cpu_usage=[current_cpu] * data_count, memory_usage=[current_memory] * data_count, max_memory_usage=[current_max_memory] * data_count) + self.assertEqual(len(metrics), num_extensions * 3) + self._assert_cgroup_polling_metrics_equal(metrics, current_cpu, current_memory, current_max_memory) CGroupsTelemetry.report_all_tracked() @@ -349,7 +373,10 @@ def test_telemetry_calculations(self, *args): patch_get_cpu_percent.return_value = cpu_percent_values[i] patch_get_memory_usage.return_value = memory_usage_values[i] # example 200 MB patch_get_memory_max_usage.return_value = max_memory_usage_values[i] # example 450 MB - CGroupsTelemetry.poll_all_tracked() + metrics = CGroupsTelemetry.poll_all_tracked() + self.assertEqual(len(metrics), 3 * num_extensions) + self._assert_cgroup_polling_metrics_equal(metrics, cpu_percent_values[i], memory_usage_values[i], + max_memory_usage_values[i]) collected_metrics = CGroupsTelemetry.report_all_tracked() for i in range(num_extensions): @@ -460,7 +487,8 @@ def test_process_cgroup_metric_with_incorrect_cgroups_mounted(self, *args): patch_get_memory_usage.side_effect = Exception("File not found") for data_count in range(1, 10): - CGroupsTelemetry.poll_all_tracked() + metrics = CGroupsTelemetry.poll_all_tracked() + self.assertEqual(len(metrics), 0) self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) @@ -494,10 +522,12 @@ def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args): poll_count = 1 for data_count in range(poll_count, 10): - CGroupsTelemetry.poll_all_tracked() + metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions) self._assert_cgroup_metrics_equal(cpu_usage=[current_cpu] * data_count, memory_usage=[], max_memory_usage=[]) + self.assertEqual(len(metrics), num_extensions * 1) # Only CPU populated + self._assert_cgroup_polling_metrics_equal(metrics, current_cpu, 0, 0) CGroupsTelemetry.report_all_tracked() @@ -533,30 +563,33 @@ def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args): poll_count = 1 for data_count in range(poll_count, 10): - CGroupsTelemetry.poll_all_tracked() + metrics = CGroupsTelemetry.poll_all_tracked() self.assertEqual(len(CGroupsTelemetry._cgroup_metrics), num_extensions) self._assert_cgroup_metrics_equal( cpu_usage=[], memory_usage=[current_memory] * data_count, max_memory_usage=[current_max_memory] * data_count) + # Memory is only populated, CPU is not. Thus 2 metrics per cgroup. + self.assertEqual(len(metrics), num_extensions * 2) + self._assert_cgroup_polling_metrics_equal(metrics, 0, current_memory, current_max_memory) CGroupsTelemetry.report_all_tracked() self.assertEqual(len(CGroupsTelemetry._cgroup_metrics), num_extensions) self._assert_cgroup_metrics_equal([], [], []) - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") - @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage") - @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") + @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage", side_effect=raise_ioerror) + @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage", side_effect=raise_ioerror) @patch("azurelinuxagent.common.osutil.default.DefaultOSUtil.get_total_cpu_ticks_since_boot") - def test_extension_temetry_not_sent_for_empty_perf_metrics(self, *args): + def test_extension_telemetry_not_sent_for_empty_perf_metrics(self, *args): num_extensions = 5 for i in range(num_extensions): dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup) - dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory", - "dummy_extension_{0}".format(i)) + dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), + "memory", "dummy_extension_{0}".format(i)) CGroupsTelemetry.track_cgroup(dummy_memory_cgroup) with patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry._process_cgroup_metric") as \ @@ -568,39 +601,39 @@ def test_extension_temetry_not_sent_for_empty_perf_metrics(self, *args): poll_count = 1 for data_count in range(poll_count, 10): - CGroupsTelemetry.poll_all_tracked() + metrics = CGroupsTelemetry.poll_all_tracked() + self.assertEqual(0, len(metrics)) collected_metrics = CGroupsTelemetry.report_all_tracked() self.assertEqual(0, len(collected_metrics)) + @skip_if_predicate_true(lambda: True, "Skipping this test currently: We need two different tests - one for " + "FileSystemCgroupAPI based test and one for SystemDCgroupAPI based test. @vrdmr will " + "be splitting this test in subsequent PRs") @skip_if_predicate_false(are_cgroups_enabled, "Does not run when Cgroups are not enabled") @skip_if_predicate_true(is_trusty_in_travis, "Does not run on Trusty in Travis") @attr('requires_sudo') - def test_telemetry_with_tracked_cgroup(self): + @patch("azurelinuxagent.common.cgroupconfigurator.get_osutil", return_value=DefaultOSUtil()) + @patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False) + def test_telemetry_with_tracked_cgroup(self, *_): self.assertTrue(i_am_root(), "Test does not run when non-root") + CGroupConfigurator._instance = None - # This test has some timing issues when systemd is managing cgroups, so we force the file system API - # by creating a new instance of the CGroupConfigurator - with patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False): - cgroup_configurator_instance = CGroupConfigurator._instance - CGroupConfigurator._instance = None - - try: - max_num_polls = 30 - time_to_wait = 3 - extn_name = "foobar-1.0.0" - num_summarization_values = 7 + max_num_polls = 30 + time_to_wait = 3 + extn_name = "foobar-1.0.0" + num_summarization_values = 7 - cgs = make_new_cgroup(extn_name) - self.assertEqual(len(cgs), 2) + cgs = make_new_cgroup(extn_name) + self.assertEqual(len(cgs), 2) - ext_handler_properties = ExtHandlerProperties() - ext_handler_properties.version = "1.0.0" - self.ext_handler = ExtHandler(name='foobar') - self.ext_handler.properties = ext_handler_properties - self.ext_handler_instance = ExtHandlerInstance(ext_handler=self.ext_handler, protocol=None) + ext_handler_properties = ExtHandlerProperties() + ext_handler_properties.version = "1.0.0" + self.ext_handler = ExtHandler(name='foobar') + self.ext_handler.properties = ext_handler_properties + self.ext_handler_instance = ExtHandlerInstance(ext_handler=self.ext_handler, protocol=None) - command = self.create_script("keep_cpu_busy_and_consume_memory_for_5_seconds", ''' + command = self.create_script("keep_cpu_busy_and_consume_memory_for_5_seconds", ''' nohup python -c "import time for i in range(5): @@ -610,59 +643,48 @@ def test_telemetry_with_tracked_cgroup(self): print('Test loop')" & '''.format(time_to_wait)) - self.log_dir = os.path.join(self.tmp_dir, "log") - - with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \ - patch_get_base_dir: - with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \ - patch_get_log_dir: - self.ext_handler_instance.launch_command(command) - - # - # If the test is made to run using the systemd API, then the paths of the cgroups need to be checked differently: - # - # self.assertEquals(len(CGroupsTelemetry._tracked), 2) - # cpu = os.path.join(BASE_CGROUPS, "cpu", "system.slice", r"foobar_1.0.0_.*\.scope") - # self.assertTrue(any(re.match(cpu, tracked.path) for tracked in CGroupsTelemetry._tracked)) - # memory = os.path.join(BASE_CGROUPS, "memory", "system.slice", r"foobar_1.0.0_.*\.scope") - # self.assertTrue(any(re.match(memory, tracked.path) for tracked in CGroupsTelemetry._tracked)) - # - self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( - BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0"))) - self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( - BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0"))) - - for i in range(max_num_polls): - CGroupsTelemetry.poll_all_tracked() - time.sleep(0.5) + self.log_dir = os.path.join(self.tmp_dir, "log") - collected_metrics = CGroupsTelemetry.report_all_tracked() + with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \ + patch_get_base_dir: + with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \ + patch_get_log_dir: + self.ext_handler_instance.launch_command(command) + + self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( + BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0"))) + self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( + BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0"))) + + for i in range(max_num_polls): + CGroupsTelemetry.poll_all_tracked() + time.sleep(0.5) + + collected_metrics = CGroupsTelemetry.report_all_tracked() - self.assertIn("memory", collected_metrics[extn_name]) - self.assertIn("cur_mem", collected_metrics[extn_name]["memory"]) - self.assertIn("max_mem", collected_metrics[extn_name]["memory"]) - self.assertEqual(len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values) - self.assertEqual(len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values) - - self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][5], str) - self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][6], str) - self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][5], str) - self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][6], str) - - self.assertIn("cpu", collected_metrics[extn_name]) - self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"]) - self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values) - - self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str) - self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str) - - for i in range(5): - self.assertGreater(collected_metrics[extn_name]["memory"]["cur_mem"][i], 0) - self.assertGreater(collected_metrics[extn_name]["memory"]["max_mem"][i], 0) - self.assertGreaterEqual(collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0) - # Equal because CPU could be zero for minimum value. - finally: - CGroupConfigurator._instance = cgroup_configurator_instance + self.assertIn("memory", collected_metrics[extn_name]) + self.assertIn("cur_mem", collected_metrics[extn_name]["memory"]) + self.assertIn("max_mem", collected_metrics[extn_name]["memory"]) + self.assertEqual(len(collected_metrics[extn_name]["memory"]["cur_mem"]), num_summarization_values) + self.assertEqual(len(collected_metrics[extn_name]["memory"]["max_mem"]), num_summarization_values) + + self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][5], str) + self.assertIsInstance(collected_metrics[extn_name]["memory"]["cur_mem"][6], str) + self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][5], str) + self.assertIsInstance(collected_metrics[extn_name]["memory"]["max_mem"][6], str) + + self.assertIn("cpu", collected_metrics[extn_name]) + self.assertIn("cur_cpu", collected_metrics[extn_name]["cpu"]) + self.assertEqual(len(collected_metrics[extn_name]["cpu"]["cur_cpu"]), num_summarization_values) + + self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][5], str) + self.assertIsInstance(collected_metrics[extn_name]["cpu"]["cur_cpu"][6], str) + + for i in range(5): + self.assertGreater(collected_metrics[extn_name]["memory"]["cur_mem"][i], 0) + self.assertGreater(collected_metrics[extn_name]["memory"]["max_mem"][i], 0) + self.assertGreaterEqual(collected_metrics[extn_name]["cpu"]["cur_cpu"][i], 0) + # Equal because CPU could be zero for minimum value. class TestMetric(AgentTestCase): diff --git a/tests/common/test_event.py b/tests/common/test_event.py index 9537b5ea47..9f0f77474f 100644 --- a/tests/common/test_event.py +++ b/tests/common/test_event.py @@ -24,7 +24,7 @@ from azurelinuxagent.common import event, logger from azurelinuxagent.common.event import add_event, \ - WALAEventOperation, elapsed_milliseconds + WALAEventOperation, elapsed_milliseconds, report_metric from azurelinuxagent.common.exception import EventError from azurelinuxagent.common.future import ustr from azurelinuxagent.common.protocol.wire import GoalState @@ -437,19 +437,50 @@ def test_elapsed_milliseconds(self): utc_start = datetime.utcnow() + timedelta(days=1) self.assertEqual(0, elapsed_milliseconds(utc_start)) + +class TestMetrics(AgentTestCase): @patch('azurelinuxagent.common.event.EventLogger.save_event') def test_report_metric(self, mock_event): event.report_metric("cpu", "%idle", "_total", 10.0) self.assertEqual(1, mock_event.call_count) event_json = mock_event.call_args[0][0] - self.assertIn("69B669B9-4AF8-4C50-BDC4-6006FA76E975", event_json) + self.assertIn(event.TELEMETRY_EVENT_PROVIDER_ID, event_json) self.assertIn("%idle", event_json) import json event_dictionary = json.loads(event_json) - self.assertEqual(event_dictionary['providerId'], "69B669B9-4AF8-4C50-BDC4-6006FA76E975") + self.assertEqual(event_dictionary['providerId'], event.TELEMETRY_EVENT_PROVIDER_ID) for parameter in event_dictionary["parameters"]: if parameter['name'] == 'Counter': self.assertEqual(parameter['value'], '%idle') break else: self.fail("Counter '%idle' not found in event parameters: {0}".format(repr(event_dictionary))) + + def test_save_metric(self): + category_present, counter_present, instance_present, value_present = False, False, False, False + report_metric("DummyCategory", "DummyCounter", "DummyInstance", 100) + self.assertTrue(len(os.listdir(self.tmp_dir)) == 1) + + # checking the extension of the file created. + for filename in os.listdir(self.tmp_dir): + self.assertEqual(".tld", filename[-4:]) + perf_metric_event = json.loads(fileutil.read_file(os.path.join(self.tmp_dir, filename))) + self.assertEqual(perf_metric_event["eventId"], event.TELEMETRY_METRICS_EVENT_ID) + self.assertEqual(perf_metric_event["providerId"], event.TELEMETRY_EVENT_PROVIDER_ID) + for i in perf_metric_event["parameters"]: + self.assertIn(i["name"], ["Category", "Counter", "Instance", "Value", "GAVersion", "ContainerId", + "OpcodeName", "EventTid", "EventPid", "TaskName", "KeywordName"]) + if i["name"] == "Category": + self.assertEqual(i["value"], "DummyCategory") + category_present = True + if i["name"] == "Counter": + self.assertEqual(i["value"], "DummyCounter") + counter_present = True + if i["name"] == "Instance": + self.assertEqual(i["value"], "DummyInstance") + instance_present = True + if i["name"] == "Value": + self.assertEqual(i["value"], 100) + value_present = True + + self.assertTrue(category_present and counter_present and instance_present and value_present) diff --git a/tests/ga/test_extension.py b/tests/ga/test_extension.py index 0efc6de924..1868f52b5d 100644 --- a/tests/ga/test_extension.py +++ b/tests/ga/test_extension.py @@ -24,7 +24,7 @@ from nose.plugins.attrib import attr from tests.protocol import mockwiredata from tests.tools import are_cgroups_enabled, AgentTestCase, data_dir, i_am_root, MagicMock, Mock, \ - skip_if_predicate_false, patch + skip_if_predicate_false, patch, is_trusty_in_travis, skip_if_predicate_true from azurelinuxagent.common.exception import ResourceGoneError from azurelinuxagent.common.protocol.restapi import Extension, ExtHandlerProperties @@ -2222,38 +2222,53 @@ def test_ext_handler_with_cgroup_enabled(self, *args): exthandlers_handler.run() self._assert_no_handler_status(protocol.report_vm_status) + @skip_if_predicate_true(lambda: True, "Skipping this test currently: We need two different tests - one for " + "FileSystemCgroupAPI based test and one for SystemDCgroupAPI based test. @vrdmr will " + "be splitting this test in subsequent PRs") + @skip_if_predicate_true(is_trusty_in_travis, "Does not run on Trusty in Travis as CPU cgroup is not mounted") + @patch('azurelinuxagent.common.event.EventLogger.add_metric') @patch('azurelinuxagent.common.event.EventLogger.add_event') @attr('requires_sudo') - def test_ext_handler_and_monitor_handler_with_cgroup_enabled(self, patch_add_event, *args): + def test_ext_handler_and_monitor_handler_with_cgroup_enabled(self, patch_add_event, patch_add_metric, *args): self.assertTrue(i_am_root(), "Test does not run when non-root") - test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) - exthandlers_handler, monitor_handler, protocol = self._create_mock(test_data, *args) + # This test has some timing issues when systemd is managing cgroups, so we force the file system API + # by creating a new instance of the CGroupConfigurator + with patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False): + cgroup_configurator_instance = CGroupConfigurator._instance + CGroupConfigurator._instance = None - monitor_handler.last_cgroup_polling_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) - monitor_handler.last_cgroup_report_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + try: + test_data = mockwiredata.WireProtocolData(mockwiredata.DATA_FILE) + exthandlers_handler, monitor_handler, protocol = self._create_mock(test_data, *args) - # Test enable scenario. - exthandlers_handler.run() - self._assert_handler_status(protocol.report_vm_status, "Ready", 1, "1.0.0") - self._assert_ext_status(protocol.report_ext_status, "success", 0) + monitor_handler.last_cgroup_polling_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + monitor_handler.last_cgroup_report_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + + # Test enable scenario. + exthandlers_handler.run() + self._assert_handler_status(protocol.report_vm_status, "Ready", 1, "1.0.0") + self._assert_ext_status(protocol.report_ext_status, "success", 0) - monitor_handler.poll_telemetry_metrics() - monitor_handler.send_telemetry_metrics() + monitor_handler.poll_telemetry_metrics() + monitor_handler.send_telemetry_metrics() - self.assertEqual(patch_add_event.call_count, 4) + self.assertEqual(patch_add_event.call_count, 5) + self.assertEqual(patch_add_metric.call_count, 3) - name = patch_add_event.call_args[0][0] - fields = patch_add_event.call_args[1] + name = patch_add_event.call_args[0][0] + fields = patch_add_event.call_args[1] - self.assertEqual(name, "WALinuxAgent") - self.assertEqual(fields["op"], "ExtensionMetricsData") - self.assertEqual(fields["is_success"], True) - self.assertEqual(fields["log_event"], False) - self.assertEqual(fields["is_internal"], False) - self.assertIsInstance(fields["message"], ustr) + self.assertEqual(name, "WALinuxAgent") + self.assertEqual(fields["op"], "ExtensionMetricsData") + self.assertEqual(fields["is_success"], True) + self.assertEqual(fields["log_event"], False) + self.assertEqual(fields["is_internal"], False) + self.assertIsInstance(fields["message"], ustr) - monitor_handler.stop() + monitor_handler.stop() + finally: + CGroupConfigurator._instance = cgroup_configurator_instance @attr('requires_sudo') def test_ext_handler_with_systemd_cgroup_enabled(self, *args): diff --git a/tests/ga/test_monitor.py b/tests/ga/test_monitor.py index 4523694fb9..bef5fbced8 100644 --- a/tests/ga/test_monitor.py +++ b/tests/ga/test_monitor.py @@ -28,26 +28,29 @@ from nose.plugins.attrib import attr -from azurelinuxagent.common import logger -from azurelinuxagent.common.cgroup import CGroup -from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry import azurelinuxagent.common.conf as conf +from azurelinuxagent.common import event, logger +from azurelinuxagent.common.cgroup import CGroup, CpuCgroup +from azurelinuxagent.common.cgroupconfigurator import CGroupConfigurator +from azurelinuxagent.common.cgroupstelemetry import CGroupsTelemetry, MetricValue from azurelinuxagent.common.datacontract import get_properties -from azurelinuxagent.common.event import EventLogger, WALAEventOperation, CONTAINER_ID_ENV_VARIABLE +from azurelinuxagent.common.event import CONTAINER_ID_ENV_VARIABLE, EventLogger, WALAEventOperation from azurelinuxagent.common.exception import HttpError from azurelinuxagent.common.future import ustr +from azurelinuxagent.common.osutil.default import BASE_CGROUPS, DefaultOSUtil from azurelinuxagent.common.protocol.imds import ComputeInfo from azurelinuxagent.common.protocol.restapi import VMInfo -from azurelinuxagent.common.protocol.wire import WireProtocol +from azurelinuxagent.common.protocol.wire import ExtHandler, ExtHandlerProperties, WireProtocol from azurelinuxagent.common.telemetryevent import TelemetryEventParam, TelemetryEvent from azurelinuxagent.common.utils import restutil, fileutil from azurelinuxagent.common.version import AGENT_VERSION, CURRENT_VERSION, AGENT_NAME, CURRENT_AGENT +from azurelinuxagent.ga.exthandlers import ExtHandlerInstance from azurelinuxagent.ga.monitor import parse_xml_event, get_monitor_handler, MonitorHandler, \ generate_extension_metrics_telemetry_dictionary, parse_json_event -from tests.common.test_cgroupstelemetry import make_new_cgroup, consume_cpu_time, consume_memory +from tests.common.test_cgroupstelemetry import make_new_cgroup from tests.protocol.mockwiredata import WireProtocolData, DATA_FILE from tests.tools import Mock, MagicMock, patch, load_data, AgentTestCase, data_dir, are_cgroups_enabled, \ - i_am_root, skip_if_predicate_false + i_am_root, skip_if_predicate_false, is_trusty_in_travis, skip_if_predicate_true class ResponseMock(Mock): @@ -574,6 +577,9 @@ def test_collect_and_send_events_should_prepare_all_fields_for_all_event_files(s message="Heartbeat", log_event=False) + # Add agent event file + self.event_logger.add_metric("Process", "% Processor Time", "walinuxagent.service", 10) + # Add extension event file the way extension do it, by dropping a .tld file in the events folder source_file = os.path.join(data_dir, "ext/dsc_event.json") dest_file = os.path.join(conf.get_lib_dir(), "events", "dsc_event.tld") @@ -584,18 +590,24 @@ def test_collect_and_send_events_should_prepare_all_fields_for_all_event_files(s monitor_handler.collect_and_send_events() telemetry_events_list = patch_report_event.call_args_list[0][0][0] - self.assertEqual(len(telemetry_events_list.events), 2) + self.assertEqual(len(telemetry_events_list.events), 3) for event in telemetry_events_list.events: # All sysinfo parameters coming from the agent have to be present in the telemetry event to be emitted for param in monitor_handler.sysinfo: self.assertTrue(param in event.parameters) - # The container id is a special parameter that is not a part of the static sysinfo parameter list. + # The container id, GAVersion are special parameters that are not a part of the static sysinfo parameter + # list. + # The container id value is obtained from the goal state and must be present in all telemetry events. container_id_param = TelemetryEventParam("ContainerId", protocol.client.goal_state.container_id) self.assertTrue(container_id_param in event.parameters) + # Same for GAVersion + container_id_param = TelemetryEventParam("GAVersion", CURRENT_AGENT) + self.assertTrue(container_id_param in event.parameters) + @patch("azurelinuxagent.common.protocol.wire.WireClient.send_event") @patch("azurelinuxagent.common.conf.get_lib_dir") def test_collect_and_send_events(self, mock_lib_dir, patch_send_event, *args): @@ -816,20 +828,30 @@ def test_collect_and_send_with_call_wireserver_returns_http_error(self, mock_lib @patch('azurelinuxagent.common.osutil.get_osutil') @patch('azurelinuxagent.common.protocol.get_protocol_util') -@patch('azurelinuxagent.common.protocol.util.ProtocolUtil.get_protocol') @patch("azurelinuxagent.common.protocol.healthservice.HealthService._report") +@patch('azurelinuxagent.common.protocol.util.ProtocolUtil.get_protocol', return_value=WireProtocol('endpoint')) @patch("azurelinuxagent.common.utils.restutil.http_get") class TestExtensionMetricsDataTelemetry(AgentTestCase): def setUp(self): AgentTestCase.setUp(self) + event.init_event_logger(os.path.join(self.tmp_dir, "events")) CGroupsTelemetry.reset() + def tearDown(self): + AgentTestCase.tearDown(self) + CGroupsTelemetry.reset() + + @patch('azurelinuxagent.common.event.EventLogger.add_metric') @patch('azurelinuxagent.common.event.EventLogger.add_event') @patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry.poll_all_tracked") @patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry.report_all_tracked") def test_send_extension_metrics_telemetry(self, patch_report_all_tracked, patch_poll_all_tracked, patch_add_event, - *args): + patch_add_metric, *args): + patch_poll_all_tracked.return_value = [MetricValue("Process", "% Processor Time", 1, 1), + MetricValue("Memory", "Total Memory Usage", 1, 1), + MetricValue("Memory", "Max Memory Usage", 1, 1)] + patch_report_all_tracked.return_value = { "memory": { "cur_mem": [1, 1, 1, 1, 1, str(datetime.datetime.utcnow()), str(datetime.datetime.utcnow())], @@ -849,14 +871,17 @@ def test_send_extension_metrics_telemetry(self, patch_report_all_tracked, patch_ self.assertEqual(1, patch_poll_all_tracked.call_count) self.assertEqual(1, patch_report_all_tracked.call_count) self.assertEqual(1, patch_add_event.call_count) + self.assertEqual(3, patch_add_metric.call_count) # Three metrics being sent. monitor_handler.stop() + @patch('azurelinuxagent.common.event.EventLogger.add_metric') @patch('azurelinuxagent.common.event.EventLogger.add_event') @patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry.poll_all_tracked") @patch("azurelinuxagent.common.cgroupstelemetry.CGroupsTelemetry.report_all_tracked", return_value={}) def test_send_extension_metrics_telemetry_for_empty_cgroup(self, patch_report_all_tracked, patch_poll_all_tracked, - patch_add_event, *args): + patch_add_event, patch_add_metric,*args): patch_report_all_tracked.return_value = {} + patch_poll_all_tracked.return_value = [] monitor_handler = get_monitor_handler() monitor_handler.init_protocols() @@ -867,46 +892,240 @@ def test_send_extension_metrics_telemetry_for_empty_cgroup(self, patch_report_al self.assertEqual(1, patch_poll_all_tracked.call_count) self.assertEqual(1, patch_report_all_tracked.call_count) self.assertEqual(0, patch_add_event.call_count) + self.assertEqual(0, patch_add_metric.call_count) + monitor_handler.stop() + + @patch('azurelinuxagent.common.event.EventLogger.add_metric') + @patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") + @patch('azurelinuxagent.common.logger.Logger.periodic_warn') + def test_send_extension_metrics_telemetry_handling_memory_cgroup_exceptions_errno2(self, patch_periodic_warn, + patch_get_memory_usage, + patch_add_metric, *args): + ioerror = IOError() + ioerror.errno = 2 + patch_get_memory_usage.side_effect = ioerror + + CGroupsTelemetry._tracked.append(CpuCgroup("cgroup_name", "/test/path")) + + monitor_handler = get_monitor_handler() + monitor_handler.init_protocols() + monitor_handler.last_cgroup_polling_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + monitor_handler.last_cgroup_report_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + monitor_handler.poll_telemetry_metrics() + self.assertEqual(0, patch_periodic_warn.call_count) + self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent. + monitor_handler.stop() + + @patch('azurelinuxagent.common.event.EventLogger.add_metric') + @patch("azurelinuxagent.common.cgroup.CpuCgroup.get_cpu_usage") + @patch('azurelinuxagent.common.logger.Logger.periodic_warn') + def test_send_extension_metrics_telemetry_handling_cpu_cgroup_exceptions_errno2(self, patch_periodic_warn, + patch_cpu_usage, patch_add_metric, + *args): + ioerror = IOError() + ioerror.errno = 2 + patch_cpu_usage.side_effect = ioerror + + CGroupsTelemetry._tracked.append(CpuCgroup("cgroup_name", "/test/path")) + + monitor_handler = get_monitor_handler() + monitor_handler.init_protocols() + monitor_handler.last_cgroup_polling_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + monitor_handler.last_cgroup_report_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + monitor_handler.poll_telemetry_metrics() + self.assertEqual(0, patch_periodic_warn.call_count) + self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent. monitor_handler.stop() + @patch('azurelinuxagent.common.event.EventLogger.add_metric') + @patch('azurelinuxagent.common.logger.Logger.periodic_warn') + def test_send_extension_metrics_telemetry_for_unsupported_cgroup(self, patch_periodic_warn, patch_add_metric, *args): + CGroupsTelemetry._tracked.append(CGroup("cgroup_name", "/test/path", "io")) + + monitor_handler = get_monitor_handler() + monitor_handler.init_protocols() + monitor_handler.last_cgroup_polling_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + monitor_handler.last_cgroup_report_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + monitor_handler.poll_telemetry_metrics() + self.assertEqual(1, patch_periodic_warn.call_count) + self.assertEqual(0, patch_add_metric.call_count) # No metrics should be sent. + + monitor_handler.stop() + + @skip_if_predicate_true(lambda: True, "Skipping this test currently: We need two different tests - one for " + "FileSystemCgroupAPI based test and one for SystemDCgroupAPI based test. @vrdmr will " + "be splitting this test in subsequent PRs") @skip_if_predicate_false(are_cgroups_enabled, "Does not run when Cgroups are not enabled") + @skip_if_predicate_true(is_trusty_in_travis, "Does not run on Trusty in Travis") + @patch('azurelinuxagent.common.event.EventLogger.add_metric') @patch('azurelinuxagent.common.event.EventLogger.add_event') @attr('requires_sudo') - def test_send_extension_metrics_telemetry_with_actual_cgroup(self, patch_add_event, *args): + def test_send_extension_metrics_telemetry_with_actual_cgroup(self, patch_add_event, patch_add_metric, *arg): self.assertTrue(i_am_root(), "Test does not run when non-root") - num_polls = 5 - name = "test-cgroup" + # This test has some timing issues when systemd is managing cgroups, so we force the file system API + # by creating a new instance of the CGroupConfigurator + with patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False): + cgroup_configurator_instance = CGroupConfigurator._instance + CGroupConfigurator._instance = None + + try: + max_num_polls = 5 + time_to_wait = 3 + extn_name = "foobar-1.0.0" + + cgs = make_new_cgroup(extn_name) + self.assertEqual(len(cgs), 2) + + ext_handler_properties = ExtHandlerProperties() + ext_handler_properties.version = "1.0.0" + ext_handler = ExtHandler(name='foobar') + ext_handler.properties = ext_handler_properties + ext_handler_instance = ExtHandlerInstance(ext_handler=ext_handler, protocol=None) + ext_handler_instance.set_operation("Enable") + + monitor_handler = get_monitor_handler() + monitor_handler.init_protocols() + monitor_handler.last_cgroup_polling_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + monitor_handler.last_cgroup_report_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + + command = self.create_script("keep_cpu_busy_and_consume_memory_for_{0}_seconds".format(time_to_wait), ''' +nohup python -c "import time + +for i in range(5): + x = [1, 2, 3, 4, 5] * (i * 1000) + time.sleep({0}) + x = [1, 2, 3, 4, 5] * (i * 1000) + x *= 0 + print('Test loop')" & +'''.format(time_to_wait)) + + self.log_dir = os.path.join(self.tmp_dir, "log") + + with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \ + patch_get_base_dir: + with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \ + patch_get_log_dir: + ext_handler_instance.launch_command(command) + + self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( + BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0"))) + self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( + BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0"))) + + for i in range(max_num_polls): + metrics = CGroupsTelemetry.poll_all_tracked() + self.assertEqual(len(metrics), 3) + + monitor_handler.poll_telemetry_metrics() + self.assertEqual(3, patch_add_metric.call_count) + + for call_arg in patch_add_metric.call_args_list: + self.assertIn(call_arg[0][0], ["Process", "Memory"]) + if call_arg[0][0] == "Process": + self.assertEqual(call_arg[0][1], "% Processor Time") + if call_arg[0][0] == "Memory": + self.assertIn(call_arg[0][1], ["Total Memory Usage", "Max Memory Usage"]) + self.assertIsInstance(call_arg[0][3], float) + + self.assertEqual(call_arg[0][2], extn_name) + self.assertFalse(call_arg[0][4]) + + monitor_handler.send_telemetry_metrics() + self.assertEqual(3, patch_add_event.call_count) # 1 for launch command, 1 for extension metrics data + # and 1 for Cgroups initialization + name = patch_add_event.call_args[0][0] + fields = patch_add_event.call_args[1] + + self.assertEqual(name, "WALinuxAgent") + self.assertEqual(fields["op"], "ExtensionMetricsData") + self.assertEqual(fields["is_success"], True) + self.assertEqual(fields["log_event"], False) + self.assertEqual(fields["is_internal"], False) + self.assertIsInstance(fields["message"], ustr) + monitor_handler.stop() + finally: + CGroupConfigurator._instance = cgroup_configurator_instance + + @skip_if_predicate_true(lambda: True, "Skipping this test currently: We need two different tests - one for " + "FileSystemCgroupAPI based test and one for SystemDCgroupAPI based test. @vrdmr will " + "be splitting this test in subsequent PRs") + @skip_if_predicate_false(are_cgroups_enabled, "Does not run when Cgroups are not enabled") + @skip_if_predicate_true(is_trusty_in_travis, "Does not run on Trusty in Travis") + @patch("azurelinuxagent.common.cgroupconfigurator.get_osutil", return_value=DefaultOSUtil()) + @patch("azurelinuxagent.common.cgroupapi.CGroupsApi._is_systemd", return_value=False) + @patch('azurelinuxagent.common.protocol.wire.WireClient.report_event') + @attr('requires_sudo') + def test_report_event_metrics_sent_for_actual_cgroup(self, patch_report_event, patch__is_systemd, patch_get_osutil, + http_get, patch_get_protocol, *args): + self.assertTrue(i_am_root(), "Test does not run when non-root") + CGroupConfigurator._instance = None - cgs = make_new_cgroup(name) + max_num_polls = 5 + time_to_wait = 1 + extn_name = "foobar-1.0.0" + cgs = make_new_cgroup(extn_name) self.assertEqual(len(cgs), 2) - for cgroup in cgs: - CGroupsTelemetry.track_cgroup(cgroup) - - for i in range(num_polls): - CGroupsTelemetry.poll_all_tracked() - consume_cpu_time() # Eat some CPU - consume_memory() + ext_handler_properties = ExtHandlerProperties() + ext_handler_properties.version = "1.0.0" + ext_handler = ExtHandler(name='foobar') + ext_handler.properties = ext_handler_properties + ext_handler_instance = ExtHandlerInstance(ext_handler=ext_handler, protocol=None) + ext_handler_instance.set_operation("Enable") monitor_handler = get_monitor_handler() monitor_handler.init_protocols() monitor_handler.last_cgroup_polling_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) monitor_handler.last_cgroup_report_telemetry = datetime.datetime.utcnow() - timedelta(hours=1) + + command = self.create_script("keep_cpu_busy_and_consume_memory_for_{0}_seconds".format(time_to_wait), ''' +nohup python -c "import time + +for i in range(3): + x = [1, 2, 3, 4, 5] * (i * 1000) + time.sleep({0}) + x *= 0 + print('Test loop')" & +'''.format(time_to_wait)) + + self.log_dir = os.path.join(self.tmp_dir, "log") + + with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_base_dir", lambda *_: self.tmp_dir) as \ + patch_get_base_dir: + with patch("azurelinuxagent.ga.exthandlers.ExtHandlerInstance.get_log_dir", lambda *_: self.log_dir) as \ + patch_get_log_dir: + ext_handler_instance.launch_command(command) + + self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( + BASE_CGROUPS, "cpu", "walinuxagent.extensions", "foobar_1.0.0"))) + self.assertTrue(CGroupsTelemetry.is_tracked(os.path.join( + BASE_CGROUPS, "memory", "walinuxagent.extensions", "foobar_1.0.0"))) + + for i in range(max_num_polls): + metrics = CGroupsTelemetry.poll_all_tracked() + self.assertEqual(len(metrics), 3) + monitor_handler.poll_telemetry_metrics() monitor_handler.send_telemetry_metrics() - self.assertEqual(1, patch_add_event.call_count) - - name = patch_add_event.call_args[0][0] - fields = patch_add_event.call_args[1] + monitor_handler.collect_and_send_events() - self.assertEqual(name, "WALinuxAgent") - self.assertEqual(fields["op"], "ExtensionMetricsData") - self.assertEqual(fields["is_success"], True) - self.assertEqual(fields["log_event"], False) - self.assertEqual(fields["is_internal"], False) - self.assertIsInstance(fields["message"], ustr) + telemetry_event_list = patch_report_event.call_args_list[0][0][0] + + for e in telemetry_event_list.events: + details_of_event = [x for x in e.parameters if x.name in + ["Category", "Counter", "Instance", "Value"]] + + for i in details_of_event: + if i.name == "Category": + self.assertIn(i.value, ["Memory", "Process"]) + if i.name == "Counter": + self.assertIn(i.value, ["Max Memory Usage", "Total Memory Usage", "% Processor Time"]) + if i.name == "Instance": + self.assertEqual(i.value, extn_name) + if i.name == "Value": + self.assertTrue(isinstance(i.value, int) or isinstance(i.value, float)) monitor_handler.stop()