Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/Azure/WALinuxAgent into …
Browse files Browse the repository at this point in the history
…add_mariner_dcr_v2
  • Loading branch information
Kevin committed Feb 22, 2022
2 parents 79cd555 + 899a38e commit 2b00a1d
Show file tree
Hide file tree
Showing 50 changed files with 898 additions and 606 deletions.
10 changes: 0 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,6 @@ A sample configuration file is shown below:
```yml
Extensions.Enabled=y
Extensions.GoalStatePeriod=6
Extensions.GoalStateHistoryCleanupPeriod=1800
Provisioning.Agent=auto
Provisioning.DeleteRootPassword=n
Provisioning.RegenerateSshHostKeyPair=y
Expand Down Expand Up @@ -243,15 +242,6 @@ _Note_: setting up this parameter to more than a few minutes can make the state
the VM be reported as unresponsive/unavailable on the Azure portal. Also, this
setting affects how fast the agent starts executing extensions.

#### __Extensions.GoalStateHistoryCleanupPeriod__

_Type: Integer_
_Default: 1800 (30 minutes)_

How often to clean up the history folder of the agent. The agent keeps past goal
states on this folder, each goal state represented with a set of small files. The
history is useful to debug issues in the agent or extensions.

#### __AutoUpdate.Enabled__

_Type: Boolean_
Expand Down
1 change: 1 addition & 0 deletions azurelinuxagent/common/agent_supported_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class SupportedFeatureNames(object):
"""
MultiConfig = "MultipleExtensionsPerHandler"
ExtensionTelemetryPipeline = "ExtensionTelemetryPipeline"
FastTrack = "FastTrack"


class AgentSupportedFeature(object):
Expand Down
11 changes: 3 additions & 8 deletions azurelinuxagent/common/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,14 @@ def load_conf_from_file(conf_file_path, conf=__conf__):
"ResourceDisk.MountOptions": None,
"ResourceDisk.Filesystem": "ext3",
"AutoUpdate.GAFamily": "Prod",
"Debug.CgroupMonitorExpiryTime": "2022-01-31",
"Debug.CgroupMonitorExpiryTime": "2022-03-31",
"Debug.CgroupMonitorExtensionName": "Microsoft.Azure.Monitor.AzureMonitorLinuxAgent",
}


__INTEGER_OPTIONS__ = {
"Extensions.GoalStatePeriod": 6,
"Extensions.InitialGoalStatePeriod": 6,
"Extensions.GoalStateHistoryCleanupPeriod": 1800,
"OS.EnableFirewallPeriod": 300,
"OS.RemovePersistentNetRulesPeriod": 30,
"OS.RootDeviceScsiTimeoutPeriod": 30,
Expand Down Expand Up @@ -377,10 +376,6 @@ def get_initial_goal_state_period(conf=__conf__):
return conf.get_int("Extensions.InitialGoalStatePeriod", default_value=lambda: get_goal_state_period(conf=conf))


def get_goal_state_history_cleanup_period(conf=__conf__):
return conf.get_int("Extensions.GoalStateHistoryCleanupPeriod", 1800)


def get_allow_reset_sys_user(conf=__conf__):
return conf.get_switch("Provisioning.AllowResetSysUser", False)

Expand Down Expand Up @@ -551,11 +546,11 @@ def get_agent_cpu_quota(conf=__conf__):

def get_cgroup_monitor_expiry_time (conf=__conf__):
"""
cgroups monitoring disabled after expiry time
cgroups monitoring for pilot extensions disabled after expiry time
NOTE: This option is experimental and may be removed in later versions of the Agent.
"""
return conf.get("Debug.CgroupMonitorExpiryTime", "2022-01-31")
return conf.get("Debug.CgroupMonitorExpiryTime", "2022-03-31")

def get_cgroup_monitor_extension_name (conf=__conf__):
"""
Expand Down
4 changes: 4 additions & 0 deletions azurelinuxagent/common/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ class VmSettingsError(ExtensionsGoalStateError):
"""
Error raised when the VmSettings are malformed
"""
def __init__(self, message, etag, vm_settings_text, inner=None):
super(VmSettingsError, self).__init__(message, inner)
self.etag = etag
self.vm_settings_text = vm_settings_text


class MultiConfigExtensionEnableError(ExtensionError):
Expand Down
1 change: 1 addition & 0 deletions azurelinuxagent/common/logcollector.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

CGROUPS_UNIT = "collect-logs.scope"

FORCE_KILLED_ERRCODE = -9
INVALID_CGROUPS_ERRCODE = 2

_MUST_COLLECT_FILES = [
Expand Down
5 changes: 3 additions & 2 deletions azurelinuxagent/common/osutil/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,10 @@ def get_firewall_dropped_packets(self, dst_ip=None):
return int(m.group(1))

except Exception as e:
if isinstance(e, CommandError) and e.returncode == 3: # pylint: disable=E1101
# Transient error that we ignore. This code fires every loop
if isinstance(e, CommandError) and (e.returncode == 3 or e.returncode == 4): # pylint: disable=E1101
# Transient error that we ignore returncode 3. This code fires every loop
# of the daemon (60m), so we will get the value eventually.
# ignore returncode 4 as temporary fix (RULE_REPLACE failed (Invalid argument))
return 0
logger.warn("Failed to get firewall packets: {0}", ustr(e))
return -1
Expand Down
14 changes: 14 additions & 0 deletions azurelinuxagent/common/protocol/extensions_goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@
from azurelinuxagent.common.utils import textutil


class GoalStateChannel(object):
WireServer = "WireServer"
HostGAPlugin = "HostGAPlugin"
Empty = "Empty"


class ExtensionsGoalState(object):
"""
ExtensionsGoalState represents the extensions information in the goal state; that information can originate from
Expand Down Expand Up @@ -50,6 +56,10 @@ def correlation_id(self):
def created_on_timestamp(self):
raise NotImplementedError()

@property
def source_channel(self):
raise NotImplementedError()

@property
def status_upload_blob(self):
raise NotImplementedError()
Expand Down Expand Up @@ -136,6 +146,10 @@ def correlation_id(self):
def created_on_timestamp(self):
return datetime.datetime.min

@property
def source_channel(self):
return GoalStateChannel.Empty

@property
def status_upload_blob(self):
return None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from azurelinuxagent.common.event import add_event, WALAEventOperation
from azurelinuxagent.common.exception import ExtensionsConfigError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel
from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, VMAgentManifest, ExtensionState, InVMGoalStateMetaData
from azurelinuxagent.common.utils.textutil import parse_doc, parse_json, findall, find, findtext, getattrib, gettext, format_exception, \
is_str_none_or_whitespace, is_str_empty
Expand All @@ -32,7 +32,7 @@
class ExtensionsGoalStateFromExtensionsConfig(ExtensionsGoalState):
def __init__(self, incarnation, xml_text, wire_client):
super(ExtensionsGoalStateFromExtensionsConfig, self).__init__()
self._id = incarnation
self._id = "incarnation_{0}".format(incarnation)
self._incarnation = incarnation
self._text = xml_text
self._status_upload_blob = None
Expand Down Expand Up @@ -148,6 +148,10 @@ def correlation_id(self):
def created_on_timestamp(self):
return self._created_on_timestamp

@property
def source_channel(self):
return GoalStateChannel.WireServer

@property
def status_upload_blob(self):
return self._status_upload_blob
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.exception import VmSettingsError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel
from azurelinuxagent.common.protocol.restapi import VMAgentManifest, Extension, ExtensionRequestedState, ExtensionSettings
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion

Expand All @@ -33,7 +34,7 @@ class ExtensionsGoalStateFromVmSettings(ExtensionsGoalState):

def __init__(self, etag, json_text):
super(ExtensionsGoalStateFromVmSettings, self).__init__()
self._id = etag
self._id = "etag_{0}".format(etag)
self._etag = etag
self._text = json_text
self._host_ga_plugin_version = FlexibleVersion('0.0.0.0')
Expand All @@ -53,7 +54,7 @@ def __init__(self, etag, json_text):
self._parse_vm_settings(json_text)
self._do_common_validations()
except Exception as e:
raise VmSettingsError("Error parsing vmSettings [HGAP: {0}]: {1}".format(self._host_ga_plugin_version, ustr(e)))
raise VmSettingsError("Error parsing vmSettings [HGAP: {0}]: {1}".format(self._host_ga_plugin_version, ustr(e)), etag, self.get_redacted_text())

@property
def id(self):
Expand Down Expand Up @@ -86,6 +87,10 @@ def created_on_timestamp(self):
"""
return self._created_on_timestamp

@property
def source_channel(self):
return GoalStateChannel.HostGAPlugin

@property
def source(self):
"""
Expand Down Expand Up @@ -200,13 +205,13 @@ def _parse_required_features(self, vm_settings):
required_features = vm_settings.get("requiredFeatures")
if required_features is not None:
if not isinstance(required_features, list):
raise Exception("requiredFeatures should be an array")
raise Exception("requiredFeatures should be an array (got {0})".format(required_features))

def get_required_features_names():
for feature in required_features:
name = feature.get("name")
if name is None:
raise Exception("A required feature is missing the 'name' property")
raise Exception("A required feature is missing the 'name' property (got {0})".format(feature))
yield name

self._required_features.extend(get_required_features_names())
Expand Down Expand Up @@ -238,7 +243,7 @@ def _parse_agent_manifests(self, vm_settings):
if families is None:
return
if not isinstance(families, list):
raise Exception("gaFamilies should be an array")
raise Exception("gaFamilies should be an array (got {0})".format(families))

for family in families:
name = family["name"]
Expand Down Expand Up @@ -321,7 +326,7 @@ def _parse_extensions(self, vm_settings):
extension_goal_states = vm_settings.get("extensionGoalStates")
if extension_goal_states is not None:
if not isinstance(extension_goal_states, list):
raise Exception("extension_goal_states should be an array")
raise Exception("extension_goal_states should be an array (got {0})".format(type(extension_goal_states))) # report only the type, since the value may contain secrets
for extension_gs in extension_goal_states:
extension = Extension()

Expand All @@ -340,7 +345,7 @@ def _parse_extensions(self, vm_settings):
additional_locations = extension_gs.get('additionalLocations')
if additional_locations is not None:
if not isinstance(additional_locations, list):
raise Exception('additionalLocations should be an array')
raise Exception('additionalLocations should be an array (got {0})'.format(additional_locations))
extension.manifest_uris.extend(additional_locations)

#
Expand Down Expand Up @@ -458,13 +463,18 @@ def _parse_dependency_level(depends_on, extension):
# ...
# }
if not isinstance(depends_on, list):
raise Exception('dependsOn should be an array')
raise Exception('dependsOn should be an array ({0}) (got {1})'.format(extension.name, depends_on))

if not extension.supports_multi_config:
# single-config
if len(depends_on) != 1:
raise Exception('dependsOn should be an array with exactly one item for single-config extensions')
extension.settings[0].dependencyLevel = depends_on[0]['dependencyLevel']
length = len(depends_on)
if length > 1:
raise Exception('dependsOn should be an array with exactly one item for single-config extensions ({0}) (got {1})'.format(extension.name, depends_on))
elif length == 0:
logger.warn('dependsOn is an empty array for extension {0}; setting the dependency level to 0'.format(extension.name))
extension.settings[0].dependencyLevel = 0
else:
extension.settings[0].dependencyLevel = depends_on[0]['dependencyLevel']
else:
# multi-config
settings_by_name = {}
Expand Down
14 changes: 10 additions & 4 deletions azurelinuxagent/common/protocol/goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.datacontract import set_properties
from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError
from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError, VmSettingsError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory
from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported
Expand Down Expand Up @@ -207,20 +207,26 @@ def _fetch_vm_settings(self, force_update=False):
vm_settings, vm_settings_updated = (None, False)

if conf.get_enable_fast_track():
def save_to_history(etag, text):
# The vmSettings are updated independently of the WireServer goal state and they are saved to a separate directory
history = GoalStateHistory(datetime.datetime.utcnow().isoformat(), etag)
history.save_vm_settings(text)

try:
vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update)

except VmSettingsNotSupported:
pass
except VmSettingsError as exception:
save_to_history(exception.etag, exception.vm_settings_text)
raise
except ResourceGoneError:
# retry after refreshing the HostGAPlugin
GoalState.update_host_plugin_headers(self._wire_client)
vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update)

if vm_settings_updated:
# The vmSettings are updated independently of the WireServer goal state and they are saved to a separate directory
history = GoalStateHistory(datetime.datetime.utcnow().isoformat(), vm_settings.etag)
history.save_vm_settings(vm_settings.get_redacted_text())
save_to_history(vm_settings.etag, vm_settings.get_redacted_text())

return vm_settings

Expand Down
18 changes: 13 additions & 5 deletions azurelinuxagent/common/protocol/hostplugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from azurelinuxagent.common import logger
from azurelinuxagent.common.errorstate import ErrorState, ERROR_STATE_HOST_PLUGIN_FAILURE
from azurelinuxagent.common.event import WALAEventOperation, add_event
from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError
from azurelinuxagent.common.exception import HttpError, ProtocolError, ResourceGoneError, VmSettingsError
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.future import ustr, httpclient
from azurelinuxagent.common.protocol.healthservice import HealthService
Expand Down Expand Up @@ -400,8 +400,11 @@ def fetch_vm_settings(self, force_update):
Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and
a boolean indicating if they are an updated (True) or a cached value (False).
Raises VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API, ResourceGoneError if the container ID and roleconfig name
need to be refreshed, or ProtocolError if the request fails for any other reason (e.g. not supported, time out, server error).
Raises
* VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API
* VmSettingsError if the HostGAPlugin returned invalid vmSettings (e.g. syntax error)
* ResourceGoneError if the container ID and roleconfig name need to be refreshed
* ProtocolError if the request fails for any other reason (e.g. not supported, time out, server error)
"""
def raise_not_supported(reset_state=False):
if reset_state:
Expand Down Expand Up @@ -477,8 +480,9 @@ def format_message(msg):
logger.info(message)
add_event(op=WALAEventOperation.HostPlugin, message=message, is_success=True)

# Don't support HostGAPlugin versions older than 115
if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.115"):
# Don't support HostGAPlugin versions older than 123
# TODO: update the minimum version to 1.0.8.123 before release
if vm_settings.host_ga_plugin_version < FlexibleVersion("1.0.8.117"):
raise_not_supported(reset_state=True)

logger.info("Fetched new vmSettings [correlation ID: {0} New eTag: {1}]", correlation_id, vm_settings.etag)
Expand All @@ -488,6 +492,10 @@ def format_message(msg):

except (ProtocolError, ResourceGoneError, VmSettingsNotSupported):
raise
except VmSettingsError as vmSettingsError:
message = format_message(ustr(vmSettingsError))
self._vm_settings_error_reporter.report_error(message)
raise
except Exception as exception:
if isinstance(exception, IOError) and "timed out" in ustr(exception):
message = format_message("Timeout")
Expand Down
8 changes: 8 additions & 0 deletions azurelinuxagent/common/protocol/restapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,14 @@ def __init__(self, status=None, message=None, gs_aggregate_status=None, update_s
self.extensionHandlers = DataContractList(ExtHandlerStatus)
self.vm_artifacts_aggregate_status = VMArtifactsAggregateStatus(gs_aggregate_status)
self.update_status = update_status
self._supports_fast_track = False

@property
def supports_fast_track(self):
return self._supports_fast_track

def set_supports_fast_track(self, value):
self._supports_fast_track = value


class VMStatus(DataContract):
Expand Down
Loading

0 comments on commit 2b00a1d

Please sign in to comment.