Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add query for vmSettings #2293

Merged
merged 1 commit into from
Jul 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions azurelinuxagent/common/protocol/hostplugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
HOST_PLUGIN_PORT = 32526

URI_FORMAT_GET_API_VERSIONS = "http://{0}:{1}/versions"
URI_FORMAT_VM_SETTINGS = "http://{0}:{1}/vmSettings"
URI_FORMAT_GET_EXTENSION_ARTIFACT = "http://{0}:{1}/extensionArtifact"
URI_FORMAT_PUT_VM_STATUS = "http://{0}:{1}/status"
URI_FORMAT_PUT_LOG = "http://{0}:{1}/vmAgentLog"
Expand Down Expand Up @@ -140,6 +141,21 @@ def get_api_versions(self):

return return_val

def get_vm_settings_request(self):
if not self.ensure_initialized():
raise ProtocolError("HostGAPlugin: Host plugin channel is not available")

url = URI_FORMAT_VM_SETTINGS.format(self.endpoint, HOST_PLUGIN_PORT)

headers = {
_HEADER_VERSION: API_VERSION,
_HEADER_CONTAINER_ID: self.container_id,
_HEADER_HOST_CONFIG_NAME: self.role_config_name,
_HEADER_CORRELATION_ID: str(uuid.uuid4()) # TODO: persist correlation ID
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should trace the correlationId to help with debugging.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, that's the "TODO" part

}

return url, headers

def get_artifact_request(self, artifact_url, artifact_manifest_url=None):
if not self.ensure_initialized():
raise ProtocolError("HostGAPlugin: Host plugin channel is not available")
Expand Down
38 changes: 11 additions & 27 deletions azurelinuxagent/common/protocol/wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,12 @@ def detect(self):
logger.info('Initializing goal state during protocol detection')
self.client.update_goal_state(forced=True)

def update_extension_goal_state(self):
self.client.update_extension_goal_state()

def update_goal_state(self):
self.client.update_goal_state()

def try_update_goal_state(self):
return self.client.try_update_goal_state()

def update_host_plugin_from_goal_state(self):
self.client.update_host_plugin_from_goal_state()

Expand Down Expand Up @@ -572,7 +572,7 @@ def __init__(self, endpoint):
logger.info("Wire server endpoint:{0}", endpoint)
self._endpoint = endpoint
self._goal_state = None
self._last_try_update_goal_state_failed = False
self._extensions_goal_state = None
self._host_plugin = None
self.status_blob = StatusBlob(self)
self.goal_state_flusher = StateFlusher(conf.get_lib_dir())
Expand Down Expand Up @@ -788,29 +788,13 @@ def update_goal_state(self, forced=False):
except Exception as exception:
raise ProtocolError("Error processing goal state: {0}".format(ustr(exception)))

def try_update_goal_state(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was moved to UpdateHandler

"""
Attempts to update the goal state and returns True on success or False on failure, sending telemetry events about the failures.
"""
def update_extension_goal_state(self):
try:
self.update_goal_state()

if self._last_try_update_goal_state_failed:
self._last_try_update_goal_state_failed = False
message = u"Retrieving the goal state recovered from previous errors"
add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=True, message=message, log_event=False)
logger.info(message)
except Exception as e:
if not self._last_try_update_goal_state_failed:
self._last_try_update_goal_state_failed = True
message = u"An error occurred while retrieving the goal state: {0}".format(ustr(e))
add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=False, message=message, log_event=False)
message = u"An error occurred while retrieving the goal state: {0}".format(ustr(traceback.format_exc()))
logger.warn(message)
message = u"Attempts to retrieve the goal state are failing: {0}".format(ustr(e))
logger.periodic_warn(logger.EVERY_SIX_HOURS, "[PERIODIC] {0}".format(message))
return False
return True
url, headers = self.get_host_plugin().get_vm_settings_request()
vm_settings = self.fetch(url, headers)
self._extensions_goal_state = vm_settings
except Exception as exception:
raise ProtocolError("Error processing extension goal state: {0}".format(ustr(exception)))

def _update_host_plugin(self, container_id, role_config_name):
if self._host_plugin is not None:
Expand Down Expand Up @@ -1043,7 +1027,7 @@ def upload_status_blob(self):

if ext_conf.status_upload_blob is None:
# the status upload blob is in ExtensionsConfig so force a full goal state refresh
self.update_goal_state(forced=True)
self.update_goal_state(forced=True) # FT: This will come from the ExtensionsGoalState
ext_conf = self.get_ext_conf()

if ext_conf.status_upload_blob is None:
Expand Down
3 changes: 0 additions & 3 deletions azurelinuxagent/daemon/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.osutil import get_osutil
from azurelinuxagent.common.protocol.util import get_protocol_util
from azurelinuxagent.common.protocol.wire import WireProtocol
from azurelinuxagent.common.rdma import setup_rdma_device
from azurelinuxagent.common.version import AGENT_NAME, AGENT_LONG_NAME, \
AGENT_VERSION, \
Expand Down Expand Up @@ -160,8 +159,6 @@ def daemon(self, child_args=None):
# incarnation number. A forced update ensures the most
# current values.
protocol = self.protocol_util.get_protocol()
if type(protocol) is not WireProtocol:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There used to be 2 protocols: WireProtocol for Azure and MetadataProtocol for Azure Stack. The Metadata protocol was removed some time ago, so this check is no longer needed,

raise Exception("Attempt to setup RDMA without Wireserver")

protocol.client.update_goal_state(forced=True)

Expand Down
28 changes: 27 additions & 1 deletion azurelinuxagent/ga/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def __init__(self):
self._heartbeat_id = str(uuid.uuid4()).upper()
self._heartbeat_counter = 0
self._heartbeat_update_goal_state_error_count = 0
self._last_try_update_goal_state_failed = False

self.last_incarnation = None

Expand Down Expand Up @@ -352,8 +353,33 @@ def _check_threads_running(self, all_thread_handlers):
logger.warn("{0} thread died, restarting".format(thread_handler.get_thread_name()))
thread_handler.start()

def _try_update_goal_state(self, protocol):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move from WireClient, but adds a call to protocol.update_extension_goal_state()

"""
Attempts to update the goal state and returns True on success or False on failure, sending telemetry events about the failures.
"""
try:
protocol.update_goal_state()
protocol.update_extension_goal_state()

if self._last_try_update_goal_state_failed:
self._last_try_update_goal_state_failed = False
message = u"Retrieving the goal state recovered from previous errors"
add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=True, message=message, log_event=False)
logger.info(message)
except Exception as e:
if not self._last_try_update_goal_state_failed:
self._last_try_update_goal_state_failed = True
message = u"An error occurred while retrieving the goal state: {0}".format(ustr(e))
add_event(AGENT_NAME, op=WALAEventOperation.FetchGoalState, version=CURRENT_VERSION, is_success=False, message=message, log_event=False)
message = u"An error occurred while retrieving the goal state: {0}".format(ustr(traceback.format_exc()))
logger.warn(message)
message = u"Attempts to retrieve the goal state are failing: {0}".format(ustr(e))
logger.periodic_warn(logger.EVERY_SIX_HOURS, "[PERIODIC] {0}".format(message))
return False
return True

def _process_goal_state(self, protocol, exthandlers_handler, remote_access_handler):
if not protocol.try_update_goal_state():
if not self._try_update_goal_state(protocol):
self._heartbeat_update_goal_state_error_count += 1
return

Expand Down
2 changes: 1 addition & 1 deletion azurelinuxagent/pa/provision/cloudinit.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def run(self):
utc_start = datetime.utcnow()
logger.info("Running CloudInit provisioning handler")
self.wait_for_ovfenv()
self.protocol_util.get_protocol()
self.protocol_util.get_protocol() # Trigger protocol detection
self.report_not_ready("Provisioning", "Starting")

thumbprint = self.wait_for_ssh_host_key() # pylint: disable=W0612
Expand Down
2 changes: 1 addition & 1 deletion azurelinuxagent/pa/provision/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def run(self):
logger.info("Copying ovf-env.xml")
ovf_env = self.protocol_util.copy_ovf_env()

self.protocol_util.get_protocol()
self.protocol_util.get_protocol() # Trigger protocol detection
self.report_not_ready("Provisioning", "Starting")
logger.info("Starting provisioning")

Expand Down
58 changes: 58 additions & 0 deletions tests/data/hostgaplugin/vm_settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"activityId": "785cb703-7468-4015-b5b8-3eb060b25a81",
"correlationId": "38e83195-ad71-4194-91d0-def8af9c4405",
"extensionsLastModifiedTickCount": 637607673150222321,
"inVMMetadata": {
"subscriptionId": "a53f7094-a16c-47af-abe4-b05c05d0d79a",
"resourceGroupName": "narrieta-rg",
"vmName": "nam-canary",
"location": "EastUS2EUAP",
"vmId": "fc9e847a-29fd-4ca8-a5ae-59e581425ca7",
"vmSize": "Standard_DS1_v2",
"osType": "Linux",
"vmImage": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "18.04-LTS",
"version": "18.04.202104150"
}
},
"extensionGoalStates": [
{
"name": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent",
"version": "1.9.1",
"location": "https://zrdfepirv2cbn09pr02a.blob.core.windows.net/a47f0806d764480a8d989d009c75007d/Microsoft.Azure.Monitor_AzureMonitorLinuxAgent_useast2euap_manifest.xml",
"state": "enabled",
"autoUpgrade": true,
"runAsStartupTask": false,
"isJson": true,
"useExactVersion": true,
"settingsSeqNo": 0,
"settings": [
{
"protectedSettingsCertThumbprint": "C6E8BB9560A41FDF7B90894738AE82AFA3440EE8",
"protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEADPJwniDeIUXzxNrZCloitFdscQ59Bz1dj9DLBREAiM8jmxM0LLicTJDUv272Qm/4ZQgdqpFYBFjGab/9MX+Ih2x47FkVY1woBkckMaC/QOFv84gbboeQCmJYZC/rZJdh8rCMS+CEPq3uH1PVrvtSdZ9uxnaJ+E4exTPPviIiLIPtqWafNlzdbBt8HZjYaVw+SSe+CGzD2pAQeNttq3Rt/6NjCzrjG8ufKwvRoqnrInMs4x6nnN5/xvobKIBSv4/726usfk8Ug+9Q6Benvfpmre2+1M5PnGTfq78cO3o6mI3cPoBUjp5M0iJjAMGeMt81tyHkimZrEZm6pLa4NQMOEjArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECC5nVaiJaWt+gAhgeYvxUOYHXw==",
"publicSettings": "{\"GCS_AUTO_CONFIG\":true}"
}
]
},
{
"name": "Microsoft.Azure.Security.Monitoring.AzureSecurityLinuxAgent",
"version": "2.15.112",
"location": "https://zrdfepirv2cbn09pr02a.blob.core.windows.net/4ef06ad957494df49c807a5334f2b5d2/Microsoft.Azure.Security.Monitoring_AzureSecurityLinuxAgent_useast2euap_manifest.xml",
"state": "enabled",
"autoUpgrade": true,
"runAsStartupTask": false,
"isJson": true,
"useExactVersion": true,
"settingsSeqNo": 0,
"settings": [
{
"protectedSettingsCertThumbprint": "C6E8BB9560A41FDF7B90894738AE82AFA3440EE8",
"protectedSettings": "MIIBsAYJKoZIhvcNAQcDoIIBoTCCAZ0CAQAxggFpMIIBZQIBADBNMDkxNzA1BgoJkiaJk/IsZAEZFidXaW5kb3dzIEF6dXJlIENSUCBDZXJ0aWZpY2F0ZSBHZW5lcmF0b3ICEFpB/HKM/7evRk+DBz754wUwDQYJKoZIhvcNAQEBBQAEggEAqBkgNZ6L81eyQ3UmHqdWTmq4Ya4r2JCjJbHnQiKeRDd+qUw0WZRKFiQ2ki0QjZeNoVAcjL9WqmrcsU83hb7j5Y/uqIcgpBHCjJ4h7dr5ZyrioONhwtcAPH7fWW1lSi37RYxJ2JrimoGdCLJWEfiN4DjLvfHbNMLkTpUck7SrJFPvnNwxJVN337GhY9rpjaL314M9eAri0f3GLN6DFheBJe++AYH6LXqVvn6WU7DOoFU9T7Fz4oBRzUAxCwPqCDbHxmjJN7zH5Bd0Y2KAt1A2J2o2Dp8lrorVu/XXpahUTz9JcBnZbsnmJh9zjo4cAp1mlxrkrcbFndpwStqcYOudoDArBgkqhkiG9w0BBwEwFAYIKoZIhvcNAwcECANcY8gd0y1AgAi90+baxPAWvg==",
"publicSettings": "{\"enableGenevaUpload\":true}"
}
]
}
]
}
122 changes: 121 additions & 1 deletion tests/ga/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import glob
import json
import os
import re
import shutil
import stat
import subprocess
Expand All @@ -22,7 +23,7 @@

from azurelinuxagent.common import conf
from azurelinuxagent.common.event import EVENTS_DIRECTORY
from azurelinuxagent.common.exception import ProtocolError, UpdateError, ResourceGoneError
from azurelinuxagent.common.exception import ProtocolError, UpdateError, ResourceGoneError, HttpError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.persist_firewall_rules import PersistFirewallRulesHandler
from azurelinuxagent.common.protocol.goal_state import ExtensionsConfig
Expand All @@ -45,6 +46,8 @@
from tests.protocol.mockwiredata import DATA_FILE, DATA_FILE_MULTIPLE_EXT
from tests.tools import AgentTestCase, data_dir, DEFAULT, patch, load_bin_data, load_data, Mock, MagicMock, \
clear_singleton_instances, mock_sleep
from tests.protocol import mockwiredata
from tests.protocol.mocks import HttpRequestPredicates

NO_ERROR = {
"last_failure": 0.0,
Expand Down Expand Up @@ -1892,6 +1895,123 @@ def time(self):
return current_time


class TryUpdateGoalStateTestCase(HttpRequestPredicates, AgentTestCase):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved from test_wire.py, but adds test_it_should_update_the_goal_state

"""
Tests for UpdateHandler._try_update_goal_state()
"""
def test_it_should_return_true_on_success(self):
update_handler = get_update_handler()
with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol:
self.assertTrue(update_handler._try_update_goal_state(protocol), "try_update_goal_state should have succeeded")

def test_it_should_return_false_on_failure(self):
with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol:
def http_get_handler(url, *_, **__):
if self.is_goal_state_request(url):
return HttpError('Exception to fake an error retrieving the goal state')
return None
protocol.set_http_handlers(http_get_handler=http_get_handler)

update_handler = get_update_handler()
self.assertFalse(update_handler._try_update_goal_state(protocol), "try_update_goal_state should have failed")

def test_it_should_update_the_goal_state(self):
update_handler = get_update_handler()
with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol:
protocol.mock_wire_data.set_incarnation(12345)

update_handler._try_update_goal_state(protocol)

self.assertEqual(protocol.get_incarnation(), '12345', "The goal state was not updated (received unexpected incarnation)")
self.assertIsNotNone(protocol.client._extensions_goal_state, "The extensions goal state was not updated") # this is just a dummy test for now
kevinclark19a marked this conversation as resolved.
Show resolved Hide resolved

def test_it_should_log_errors_only_when_the_error_state_changes(self):
with mock_wire_protocol(mockwiredata.DATA_FILE) as protocol:
def http_get_handler(url, *_, **__):
if self.is_goal_state_request(url):
if fail_goal_state_request:
return HttpError('Exception to fake an error retrieving the goal state')
return None

protocol.set_http_handlers(http_get_handler=http_get_handler)

@contextlib.contextmanager
def create_log_and_telemetry_mocks():
with patch("azurelinuxagent.ga.update.logger", autospec=True) as logger_patcher:
with patch("azurelinuxagent.ga.update.add_event") as add_event_patcher:
yield logger_patcher, add_event_patcher

calls_to_strings = lambda calls: (str(c) for c in calls)
filter_calls = lambda calls, regex=None: (c for c in calls_to_strings(calls) if regex is None or re.match(regex, c))
logger_calls = lambda regex=None: [m for m in filter_calls(logger.method_calls, regex)] # pylint: disable=used-before-assignment,unnecessary-comprehension
warnings = lambda: logger_calls(r'call.warn\(.*An error occurred while retrieving the goal state.*')
periodic_warnings = lambda: logger_calls(r'call.periodic_warn\(.*Attempts to retrieve the goal state are failing.*')
success_messages = lambda: logger_calls(r'call.info\(.*Retrieving the goal state recovered from previous errors.*')
telemetry_calls = lambda regex=None: [m for m in filter_calls(add_event.mock_calls, regex)] # pylint: disable=used-before-assignment,unnecessary-comprehension
goal_state_events = lambda: telemetry_calls(r".*op='FetchGoalState'.*")

#
# Initially calls to retrieve the goal state are successful...
#
update_handler = get_update_handler()
fail_goal_state_request = False
with create_log_and_telemetry_mocks() as (logger, add_event):
update_handler._try_update_goal_state(protocol)

lc = logger_calls()
self.assertTrue(len(lc) == 0, "A successful call should not produce any log messages: [{0}]".format(lc))

tc = telemetry_calls()
self.assertTrue(len(tc) == 0, "A successful call should not produce any telemetry events: [{0}]".format(tc))

#
# ... then an error happens...
#
fail_goal_state_request = True
with create_log_and_telemetry_mocks() as (logger, add_event):
update_handler._try_update_goal_state(protocol)

w = warnings()
pw = periodic_warnings()
self.assertEqual(1, len(w), "A failure should have produced a warning: [{0}]".format(w))
self.assertEqual(1, len(pw), "A failure should have produced a periodic warning: [{0}]".format(pw))

gs = goal_state_events()
self.assertTrue(len(gs) == 1 and 'is_success=False' in gs[0], "A failure should produce a telemetry event (success=false): [{0}]".format(gs))

#
# ... and errors continue happening...
#
with create_log_and_telemetry_mocks() as (logger, add_event):
update_handler._try_update_goal_state(protocol)
update_handler._try_update_goal_state(protocol)
update_handler._try_update_goal_state(protocol)

w = warnings()
pw = periodic_warnings()
self.assertTrue(len(w) == 0, "Subsequent failures should not produce warnings: [{0}]".format(w))
self.assertEqual(len(pw), 3, "Subsequent failures should produce periodic warnings: [{0}]".format(pw))

tc = telemetry_calls()
self.assertTrue(len(tc) == 0, "Subsequent failures should not produce any telemetry events: [{0}]".format(tc))

#
# ... until we finally succeed
#
fail_goal_state_request = False
with create_log_and_telemetry_mocks() as (logger, add_event):
update_handler._try_update_goal_state(protocol)

s = success_messages()
w = warnings()
pw = periodic_warnings()
self.assertEqual(len(s), 1, "Recovering after failures should have produced an info message: [{0}]".format(s))
self.assertTrue(len(w) == 0 and len(pw) == 0, "Recovering after failures should have not produced any warnings: [{0}] [{1}]".format(w, pw))

gs = goal_state_events()
self.assertTrue(len(gs) == 1 and 'is_success=True' in gs[0], "Recovering after failures should produce a telemetry event (success=true): [{0}]".format(gs))


class TestProcessGoalState(AgentTestCase):
"""
Tests for UpdateHandler._process_goal_state
Expand Down
Loading