Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add API for uploading logs via host plugin #1902

Merged
merged 10 commits into from
Aug 4, 2020
57 changes: 52 additions & 5 deletions azurelinuxagent/common/protocol/hostplugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,39 @@
import base64
import datetime
import json
import uuid
pgombar marked this conversation as resolved.
Show resolved Hide resolved

from azurelinuxagent.common import logger
from azurelinuxagent.common.errorstate import ErrorState, ERROR_STATE_HOST_PLUGIN_FAILURE
from azurelinuxagent.common.event import WALAEventOperation, report_event
pgombar marked this conversation as resolved.
Show resolved Hide resolved
from azurelinuxagent.common.exception import HttpError, ProtocolError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.healthservice import HealthService
from azurelinuxagent.common.utils import restutil
from azurelinuxagent.common.utils import textutil
from azurelinuxagent.common.utils.textutil import remove_bom
from azurelinuxagent.common.version import PY_VERSION_MAJOR
from azurelinuxagent.common.version import AGENT_NAME, AGENT_VERSION, PY_VERSION_MAJOR

HOST_PLUGIN_PORT = 32526

URI_FORMAT_GET_API_VERSIONS = "http://{0}:{1}/versions"
URI_FORMAT_GET_EXTENSION_ARTIFACT = "http://{0}:{1}/extensionArtifact"
URI_FORMAT_PUT_VM_STATUS = "http://{0}:{1}/status"
URI_FORMAT_PUT_LOG = "http://{0}:{1}/vmAgentLog"
URI_FORMAT_HEALTH = "http://{0}:{1}/health"

API_VERSION = "2015-09-01"

HEADER_CLIENT_NAME = "x-ms-client-name"
pgombar marked this conversation as resolved.
Show resolved Hide resolved
HEADER_CLIENT_VERSION = "x-ms-client-version"
HEADER_CORRELATION_ID = "x-ms-client-correlationid"
HEADER_CONTAINER_ID = "x-ms-containerid"
HEADER_DEPLOYMENT_ID = "x-ms-vmagentlog-deploymentid"
HEADER_VERSION = "x-ms-version"
HEADER_HOST_CONFIG_NAME = "x-ms-host-config-name"
HEADER_ARTIFACT_LOCATION = "x-ms-artifact-location"
HEADER_ARTIFACT_MANIFEST_LOCATION = "x-ms-artifact-manifest-location"

MAXIMUM_PAGEBLOB_PAGE_SIZE = 4 * 1024 * 1024 # Max page size: 4MB


Expand All @@ -60,7 +70,7 @@ def __init__(self, endpoint, container_id, role_config_name):
self.api_versions = None
self.endpoint = endpoint
self.container_id = container_id
self.deployment_id = None
self.deployment_id = self._extract_deployment_id(role_config_name)
larohra marked this conversation as resolved.
Show resolved Hide resolved
self.role_config_name = role_config_name
self.manifest_uri = None
self.health_service = HealthService(endpoint)
Expand All @@ -69,6 +79,11 @@ def __init__(self, endpoint, container_id, role_config_name):
self.fetch_last_timestamp = None
self.status_last_timestamp = None

@staticmethod
def _extract_deployment_id(role_config_name):
# Role config name consists of: <deployment id>.<incarnation>(...)
return role_config_name.split(".")[0] if role_config_name is not None else None

@staticmethod
def is_default_channel():
return HostPluginProtocol._is_default_channel
Expand All @@ -82,6 +97,7 @@ def update_container_id(self, new_container_id):

def update_role_config_name(self, new_role_config_name):
self.role_config_name = new_role_config_name
self.deployment_id = self._extract_deployment_id(new_role_config_name)

def update_manifest_uri(self, new_manifest_uri):
self.manifest_uri = new_manifest_uri
Expand All @@ -91,7 +107,6 @@ def ensure_initialized(self):
self.api_versions = self.get_api_versions()
self.is_available = API_VERSION in self.api_versions
self.is_initialized = self.is_available
from azurelinuxagent.common.event import WALAEventOperation, report_event
report_event(WALAEventOperation.InitializeHostPlugin,
is_success=self.is_available)
return self.is_available
Expand Down Expand Up @@ -202,7 +217,29 @@ def should_report(is_healthy, error_state, last_timestamp, period):
return datetime.datetime.utcnow() >= (last_timestamp + period)

def put_vm_log(self, content):
raise NotImplementedError("Unimplemented")
"""
Try to upload VM logs, a compressed zip file, via the host plugin /vmAgentLog channel.
:param content: the binary content of the zip file to upload
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wow, so the host ga plugin requires to load the whole file to memory? what is the max size of the zip, again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't figured out a way to not load it. Everything we send using restutil loads the object in-memory. The restriction on the agent side for the uncompressed archive is 150 MB (from my testing on actual log data, this will result in a compressed archive of 13-15 MB). On the host, the restriction is 100 MB.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we plan on adding resource limiters to this process/thread, can that potentially break/delay uploading the logs to Host?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory, it could. However, from the memory point of view, all this is adding is an overhead of ~15 MB (worst case scenario) every hour. Disk I/O could be a bigger problem, but we don't have any plans for limiting that in the near future.

"""
if not self.ensure_initialized():
raise ProtocolError("HostGAPlugin: HostGAPlugin is not available")

if content is None or self.container_id is None or self.deployment_id is None:
larohra marked this conversation as resolved.
Show resolved Hide resolved
pgombar marked this conversation as resolved.
Show resolved Hide resolved
raise ProtocolError("HostGAPlugin: Invalid arguments passed to upload VM logs. "
"Content: {0}, container id: {1}, deployment id: {2}".format(content,
self.container_id,
self.deployment_id))

url = URI_FORMAT_PUT_LOG.format(self.endpoint, HOST_PLUGIN_PORT)
response = restutil.http_put(url,
pgombar marked this conversation as resolved.
Show resolved Hide resolved
data=content,
headers=self._build_log_headers())

if restutil.request_failed(response):
error_response = restutil.read_response_error(response)
raise HttpError("HostGAPlugin: Upload VM logs failed: {0}".format(error_response))
else:
logger.info("HostGAPlugin: Upload VM logs succeeded")

def put_vm_status(self, status_blob, sas_url, config_blob_type=None):
"""
Expand Down Expand Up @@ -327,7 +364,17 @@ def _build_status_headers(self):
HEADER_CONTAINER_ID: self.container_id,
HEADER_HOST_CONFIG_NAME: self.role_config_name
}


def _build_log_headers(self):
return {
HEADER_VERSION: API_VERSION,
HEADER_CONTAINER_ID: self.container_id,
HEADER_DEPLOYMENT_ID: self.deployment_id,
HEADER_CLIENT_NAME: AGENT_NAME,
HEADER_CLIENT_VERSION: AGENT_VERSION,
HEADER_CORRELATION_ID: str(uuid.uuid4())
}

def _base64_encode(self, data):
s = base64.b64encode(bytes(data))
if PY_VERSION_MAJOR > 2:
Expand Down
70 changes: 65 additions & 5 deletions azurelinuxagent/common/protocol/wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
import azurelinuxagent.common.logger as logger
import azurelinuxagent.common.utils.textutil as textutil
from azurelinuxagent.common.datacontract import validate_param
from azurelinuxagent.common.event import add_event, add_periodic, WALAEventOperation, EVENTS_DIRECTORY
from azurelinuxagent.common.event import add_event, add_periodic, EVENTS_DIRECTORY
from azurelinuxagent.common.event import report_event, WALAEventOperation
from azurelinuxagent.common.exception import ProtocolNotFoundError, \
ResourceGoneError, ExtensionDownloadError, InvalidContainerError, ProtocolError, HttpError
from azurelinuxagent.common.future import httpclient, bytebuffer, ustr
Expand Down Expand Up @@ -192,6 +193,9 @@ def report_event(self, events):
validate_param(EVENTS_DIRECTORY, events, TelemetryEventList)
self.client.report_event(events)

def upload_logs(self, logs):
self.client.upload_logs(logs)


def _build_role_properties(container_id, role_instance_id, thumbprint):
xml = (u"<?xml version=\"1.0\" encoding=\"utf-8\"?>"
Expand Down Expand Up @@ -1116,9 +1120,6 @@ def report_event(self, event_list):
self.send_event(provider_id, buf[provider_id])

def report_status_event(self, message, is_success):
from azurelinuxagent.common.event import report_event, \
WALAEventOperation

report_event(op=WALAEventOperation.ReportStatus,
is_success=is_success,
message=message,
Expand Down Expand Up @@ -1195,14 +1196,73 @@ def get_artifacts_profile(self):
msg = "Content: [{0}]".format(profile)
logger.verbose(msg)

from azurelinuxagent.common.event import report_event, WALAEventOperation
report_event(op=WALAEventOperation.ArtifactsProfileBlob,
is_success=False,
message=msg,
log_event=False)

return artifacts_profile

def upload_logs(self, content):
try:
host_plugin = self.get_host_plugin()
host_plugin.put_vm_log(content)

msg = "Upload VM logs request succeeded using the host plugin channel for " \
pgombar marked this conversation as resolved.
Show resolved Hide resolved
"container id {0} and role config file {1}".format(host_plugin.container_id,
host_plugin.role_config_name)
add_event(name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.HostPlugin,
is_success=True,
message=msg,
log_event=True)
pgombar marked this conversation as resolved.
Show resolved Hide resolved
except (ResourceGoneError, InvalidContainerError) as e:
pgombar marked this conversation as resolved.
Show resolved Hide resolved
host_plugin = self.get_host_plugin()
old_container_id = host_plugin.container_id
old_role_config_name = host_plugin.role_config_name

msg = "Upload VM logs request failed with the current host plugin configuration. " \
"ContainerId: {0}, role config file: {1}. Fetching new goal state and retrying the call." \
"Error: {2}".format(old_container_id, old_role_config_name, ustr(e))
logger.info(msg)

self.update_host_plugin_from_goal_state()

new_container_id = host_plugin.container_id
new_role_config_name = host_plugin.role_config_name
msg = "Host plugin reconfigured with new parameters. " \
"ContainerId: {0}, role config file: {1}.".format(new_container_id, new_role_config_name)
logger.info(msg)

try:
host = self.get_host_plugin()
pgombar marked this conversation as resolved.
Show resolved Hide resolved
pgombar marked this conversation as resolved.
Show resolved Hide resolved
host.put_vm_log(content)

msg = "Upload VM logs request succeeded using the host plugin channel after goal state refresh. " \
"ContainerId changed from {0} to {1}, " \
"role config file changed from {2} to {3}.".format(old_container_id, new_container_id,
old_role_config_name, new_role_config_name)
add_event(name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.HostPlugin,
is_success=True,
message=msg,
log_event=True)

except (ResourceGoneError, InvalidContainerError) as e:
msg = "Upload VM logs request failed using the host plugin channel after goal state refresh. " \
"ContainerId changed from {0} to {1}, role config file changed from {2} to {3}. " \
"Exception type: {4}.".format(old_container_id, new_container_id, old_role_config_name,
new_role_config_name, type(e).__name__)
add_event(name=AGENT_NAME,
version=CURRENT_VERSION,
op=WALAEventOperation.HostPlugin,
is_success=False,
message=msg,
log_event=True)
raise


class VersionInfo(object):
def __init__(self, xml_text):
Expand Down
16 changes: 9 additions & 7 deletions tests/common/test_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,21 @@
import re
import shutil
import threading
import xml.dom
from datetime import datetime, timedelta

import xml.dom

import azurelinuxagent.common.utils.textutil as textutil
from azurelinuxagent.common import event, logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.event import add_event, add_periodic, add_log_event, elapsed_milliseconds, report_metric, \
WALAEventOperation, parse_xml_event, parse_json_event, AGENT_EVENT_FILE_EXTENSION, EVENTS_DIRECTORY
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.version import CURRENT_AGENT, CURRENT_VERSION, AGENT_EXECUTION_MODE
from tests.protocol import mockwiredata
from tests.protocol.mocks import mock_wire_protocol, HttpRequestPredicates, MockHttpResponse
from azurelinuxagent.common.version import CURRENT_AGENT, CURRENT_VERSION, AGENT_EXECUTION_MODE
from azurelinuxagent.common.osutil import get_osutil
from tests.tools import AgentTestCase, data_dir, load_data, Mock, patch, skip_if_predicate_true
from tests.tools import AgentTestCase, data_dir, load_data, patch, skip_if_predicate_true
from tests.utils.event_logger_tools import EventLoggerTools


Expand All @@ -48,8 +50,8 @@ def setUp(self):
osutil = get_osutil()

self.expected_common_parameters = {
# common parameters computed at event creation; the timestamp (stored as the opcode name) is not included here and
# is checked separately from these parameters
# common parameters computed at event creation; the timestamp (stored as the opcode name) is not included
# here and is checked separately from these parameters
'GAVersion': CURRENT_AGENT,
'ContainerId': AgentGlobals.get_container_id(),
'EventTid': threading.current_thread().ident,
Expand All @@ -66,7 +68,7 @@ def setUp(self):
'VMName': 'MachineRole_IN_0',
'TenantName': 'db00a7755a5e4e8a8fe4b19bc3b330c3',
'RoleName': 'MachineRole',
'RoleInstanceName': 'MachineRole_IN_0',
'RoleInstanceName': 'b61f93d0-e1ed-40b2-b067-22c243233448.MachineRole_IN_0',
# common parameters
'Location': EventLoggerTools.mock_imds_data['location'],
'SubscriptionId': EventLoggerTools.mock_imds_data['subscriptionId'],
Expand Down
4 changes: 2 additions & 2 deletions tests/data/wire/goal_state.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
<ContainerId>c6d5526c-5ac2-4200-b6e2-56f2b70c5ab2</ContainerId>
<RoleInstanceList>
<RoleInstance>
<InstanceId>MachineRole_IN_0</InstanceId>
<InstanceId>b61f93d0-e1ed-40b2-b067-22c243233448.MachineRole_IN_0</InstanceId>
larohra marked this conversation as resolved.
Show resolved Hide resolved
<State>Started</State>
<Configuration>
<HostingEnvironmentConfig>http://168.63.129.16:80/hostingenvuri/</HostingEnvironmentConfig>
<SharedConfig>http://168.63.129.16:80/sharedconfiguri/</SharedConfig>
<Certificates>http://168.63.129.16:80/certificatesuri/</Certificates>
<ExtensionsConfig>http://168.63.129.16:80/extensionsconfiguri/</ExtensionsConfig>
<FullConfig>http://168.63.129.16:80/fullconfiguri/</FullConfig>
<ConfigName>DummyRoleConfigName.xml</ConfigName>
<ConfigName>b61f93d0-e1ed-40b2-b067-22c243233448.1.b61f93d0-e1ed-40b2-b067-22c243233448.2.MachineRole_IN_0.xml</ConfigName>
</Configuration>
</RoleInstance>
</RoleInstanceList>
Expand Down
3 changes: 2 additions & 1 deletion tests/data/wire/goal_state_no_ext.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
<ContainerId>c6d5526c-5ac2-4200-b6e2-56f2b70c5ab2</ContainerId>
<RoleInstanceList>
<RoleInstance>
<InstanceId>MachineRole_IN_0</InstanceId>
<InstanceId>b61f93d0-e1ed-40b2-b067-22c243233448.MachineRole_IN_0</InstanceId>
<State>Started</State>
<Configuration>
<HostingEnvironmentConfig>http://168.63.129.16:80/hostingenvuri/</HostingEnvironmentConfig>
<SharedConfig>http://168.63.129.16:80/sharedconfiguri/</SharedConfig>
<Certificates>http://168.63.129.16:80/certificatesuri/</Certificates>
<FullConfig>http://168.63.129.16:80/fullconfiguri/</FullConfig>
<ConfigName>b61f93d0-e1ed-40b2-b067-22c243233448.1.b61f93d0-e1ed-40b2-b067-22c243233448.2.MachineRole_IN_0.xml</ConfigName>
</Configuration>
</RoleInstance>
</RoleInstanceList>
Expand Down
4 changes: 2 additions & 2 deletions tests/data/wire/goal_state_remote_access.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
</RemoteAccessInfo>
<RoleInstanceList>
<RoleInstance>
<InstanceId>MachineRole_IN_0</InstanceId>
<InstanceId>b61f93d0-e1ed-40b2-b067-22c243233448.MachineRole_IN_0</InstanceId>
<State>Started</State>
<Configuration>
<HostingEnvironmentConfig>http://168.63.129.16:80/hostingenvuri/</HostingEnvironmentConfig>
<SharedConfig>http://168.63.129.16:80/sharedconfiguri/</SharedConfig>
<Certificates>http://168.63.129.16:80/certificatesuri/</Certificates>
<ExtensionsConfig>http://168.63.129.16:80/extensionsconfiguri/</ExtensionsConfig>
<FullConfig>http://168.63.129.16:80/fullconfiguri/</FullConfig>
<ConfigName>DummyRoleConfigName.xml</ConfigName>
<ConfigName>b61f93d0-e1ed-40b2-b067-22c243233448.1.b61f93d0-e1ed-40b2-b067-22c243233448.2.MachineRole_IN_0.xml</ConfigName>
</Configuration>
</RoleInstance>
</RoleInstanceList>
Expand Down
2 changes: 1 addition & 1 deletion tests/ga/test_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def test_collect_and_send_events(self, mock_lib_dir, patch_send_event, *_):
'<Param Name="VMName" Value="MachineRole_IN_0" T="mt:wstr" />' \
'<Param Name="TenantName" Value="db00a7755a5e4e8a8fe4b19bc3b330c3" T="mt:wstr" />' \
'<Param Name="RoleName" Value="MachineRole" T="mt:wstr" />' \
'<Param Name="RoleInstanceName" Value="MachineRole_IN_0" T="mt:wstr" />' \
'<Param Name="RoleInstanceName" Value="b61f93d0-e1ed-40b2-b067-22c243233448.MachineRole_IN_0" T="mt:wstr" />' \
'<Param Name="Location" Value="uswest" T="mt:wstr" />' \
'<Param Name="SubscriptionId" Value="AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE" T="mt:wstr" />' \
'<Param Name="ResourceGroupName" Value="test-rg" T="mt:wstr" />' \
Expand Down
7 changes: 7 additions & 0 deletions tests/protocol/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ def http_request(method, url, data, **kwargs):
return protocol.mock_wire_data.mock_http_get(url, **kwargs)
if method == 'POST':
return protocol.mock_wire_data.mock_http_post(url, data, **kwargs)
if method == 'PUT':
return protocol.mock_wire_data.mock_http_put(url, data, **kwargs)

# the request was not handled; fail or call the original resutil.http_request
if fail_on_unknown_request:
Expand Down Expand Up @@ -201,6 +203,11 @@ def is_host_plugin_in_vm_artifacts_profile_request(url, request_kwargs):
artifact_location = HttpRequestPredicates._get_host_plugin_request_artifact_location(url, request_kwargs)
return HttpRequestPredicates.is_in_vm_artifacts_profile_request(artifact_location)

@staticmethod
def is_host_plugin_put_logs_request(url):
return url.lower() == 'http://{0}:{1}/vmagentlog'.format(restutil.KNOWN_WIRESERVER_IP,
restutil.HOST_PLUGIN_PORT)


class MockHttpResponse:
def __init__(self, status, body=''):
Expand Down
16 changes: 16 additions & 0 deletions tests/protocol/mockwiredata.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def __init__(self, data_files=DATA_FILE):
"/versions": 0,
"/health": 0,
"/HealthService": 0,
"/vmAgentLog": 0,
"goalstate": 0,
"hostingenvuri": 0,
"sharedconfiguri": 0,
Expand Down Expand Up @@ -244,6 +245,21 @@ def mock_http_post(self, url, *args, **kwargs):
resp.read = Mock(return_value=content.encode("utf-8"))
return resp

def mock_http_put(self, url, *args, **kwargs):
content = None

resp = MagicMock()
resp.status = httpclient.OK

if url.endswith('/vmAgentLog'):
self.call_counts['/vmAgentLog'] += 1
content = ''
else:
raise Exception("Bad url {0}".format(url))

resp.read = Mock(return_value=content.encode("utf-8"))
return resp

def mock_crypt_util(self, *args, **kw):
#Partially patch instance method of class CryptUtil
cryptutil = CryptUtil(*args, **kw)
Expand Down
Loading