Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge ExtensionsGoalState into GoalState #2490

Merged
merged 15 commits into from
Feb 7, 2022
6 changes: 0 additions & 6 deletions azurelinuxagent/common/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,6 @@ class ProtocolNotFoundError(ProtocolError):
"""


class IncompleteGoalStateError(ProtocolError):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not see the need to have a separate exception for this error so I changed the code to use ProtocolError.

This requires a change in DCR, which looks for IncompleteGoalStateError. I am preparing that change and will submit the PR on the tux repo.

"""
Goal state is returned incomplete.
"""


class HttpError(AgentError):
"""
Http request failure
Expand Down
7 changes: 1 addition & 6 deletions azurelinuxagent/common/logcollector.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,7 @@

_MUST_COLLECT_FILES = [
_AGENT_LOG,
os.path.join(_AGENT_LIB_DIR, "GoalState.*.xml"),
os.path.join(_AGENT_LIB_DIR, "ExtensionsConfig.*.xml"),
os.path.join(_AGENT_LIB_DIR, "HostingEnvironmentConfig.*.xml"),
os.path.join(_AGENT_LIB_DIR, "SharedConfig.*.xml"),
os.path.join(_AGENT_LIB_DIR, "*manifest.xml"),
os.path.join(_AGENT_LIB_DIR, "waagent_status.*.json"),
os.path.join(_AGENT_LIB_DIR, "waagent_status.json"),
os.path.join(_AGENT_LIB_DIR, "history", "*.zip"),
os.path.join(_EXTENSION_LOG_DIR, "*", "*"),
os.path.join(_EXTENSION_LOG_DIR, "*", "*", "*"),
Expand Down
15 changes: 4 additions & 11 deletions azurelinuxagent/common/logcollector_manifests.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,13 @@
echo,

echo,### Gathering Extension Files ###
copy,$LIB_DIR/*.xml
copy,$LIB_DIR/VmSettings.*.json
copy,$LIB_DIR/waagent_status.*.json
copy,$LIB_DIR/ovf-env.xml
copy,$LIB_DIR/waagent_status.json
copy,$LIB_DIR/*/status/*.status
copy,$LIB_DIR/*/config/*.settings
copy,$LIB_DIR/*/config/HandlerState
copy,$LIB_DIR/*/config/HandlerStatus
copy,$LIB_DIR/*.agentsManifest
copy,$LIB_DIR/error.json
copy,$LIB_DIR/Incarnation
copy,$LIB_DIR/history/*.zip
echo,
"""
Expand Down Expand Up @@ -108,19 +105,15 @@
echo,

echo,### Gathering Extension Files ###
copy,$LIB_DIR/ExtensionsConfig.*.xml
copy,$LIB_DIR/ovf-env.xml
copy,$LIB_DIR/*/status/*.status
copy,$LIB_DIR/*/config/*.settings
copy,$LIB_DIR/*/config/HandlerState
copy,$LIB_DIR/*/config/HandlerStatus
copy,$LIB_DIR/GoalState.*.xml
copy,$LIB_DIR/HostingEnvironmentConfig.xml
copy,$LIB_DIR/*.manifest.xml
copy,$LIB_DIR/SharedConfig.xml
copy,$LIB_DIR/ManagedIdentity-*.json
copy,$LIB_DIR/*/error.json
copy,$LIB_DIR/Incarnation
copy,$LIB_DIR/waagent_status.*.json
copy,$LIB_DIR/waagent_status.json
copy,$LIB_DIR/history/*.zip
echo,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def correlation_id(self):

@property
def created_on_timestamp(self):
"""
Timestamp assigned by the CRP (time at which the Fast Track goal state was created)
"""
return self._created_on_timestamp

@property
Expand Down
266 changes: 213 additions & 53 deletions azurelinuxagent/common/protocol/goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# limitations under the License.
#
# Requires Python 2.6+ and Openssl 1.0+

import datetime
import os
import re
import time
Expand All @@ -24,12 +24,13 @@
import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.datacontract import set_properties
from azurelinuxagent.common.exception import IncompleteGoalStateError
from azurelinuxagent.common.exception import ProtocolError
from azurelinuxagent.common.exception import ProtocolError, ResourceGoneError
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory
from azurelinuxagent.common.protocol.hostplugin import VmSettingsNotSupported
from azurelinuxagent.common.protocol.restapi import Cert, CertList, RemoteAccessUser, RemoteAccessUsersList
from azurelinuxagent.common.utils import fileutil
from azurelinuxagent.common.utils.archive import GoalStateHistory
from azurelinuxagent.common.utils.cryptutil import CryptUtil
from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, findtext, getattrib

Expand All @@ -40,81 +41,240 @@
TRANSPORT_CERT_FILE_NAME = "TransportCert.pem"
TRANSPORT_PRV_FILE_NAME = "TransportPrivate.pem"

_NUM_GS_FETCH_RETRIES = 6
_GET_GOAL_STATE_MAX_ATTEMPTS = 6


class GoalState(object):
def __init__(self, wire_client):
"""
Fetches the goal state using the given wire client.

__init__ fetches only the goal state itself, not including inner properties such as ExtensionsConfig; to fetch the entire goal state
use the fetch_full_goal_state().
Fetching the goal state involves several HTTP requests to the WireServer and the HostGAPlugin. There is an initial request to WireServer's goalstate API,
which response includes the incarnation, role instance, container ID, role config, and URIs to the rest of the goal state (ExtensionsConfig, Certificates,
Remote Access users, etc.). Additional requests are done using those URIs (all of them point to APIs in the WireServer). Additionally, there is a
request to the HostGAPlugin for the vmSettings, which determines the goal state for extensions when using the Fast Track pipeline.

To reduce the number of requests, when possible, create a single instance of GoalState and use the update() method to keep it up to date.
"""
Copy link
Member Author

@narrieta narrieta Feb 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now instantiating GoalState fetches the entire goal state (minus the agent/extension manifests). Looking at the usages of the goal state, there is no need to separate methods to request the goal state and then the extensions config. etc. The fetch_full_goal_state() is now gone.

The only usage for invoking only the goalstate API (and not fetch the rest of the goal state) was to update the headers used by the hostgaplugin. I created a separate method for that (update_host_plugin_headers)

try:
self._wire_client = wire_client

# These "basic" properties come from the initial request to WireServer's goalstate API
self._timestamp = None
self._incarnation = None
self._role_instance_id = None
self._role_config_name = None
self._container_id = None

xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client)

self._initialize_basic_properties(xml_doc)

# The goal state for extensions can come from vmSettings when using FastTrack or from extensionsConfig otherwise, self._fetch_extended_goal_state
# populates the '_extensions' property.
self._extensions = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is this being used?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, good catch! This was meant to be '_extensions_goal_state', but I did not remove the old variable ('_extensions')

vm_settings = self._fetch_vm_settings()

# These "extended" properties come from additional HTTP requests to the URIs included in the basic goal state
self._hosting_env = None
self._shared_conf = None
self._certs = None
self._remote_access = None
self._extensions_goal_state = None

self._fetch_extended_goal_state(xml_text, xml_doc, vm_settings)

except Exception as exception:
# We don't log the error here since fetching the goal state is done every few seconds
raise ProtocolError(msg="Error fetching goal state", inner=exception)

@property
def timestamp(self):
return self._timestamp

@property
def incarnation(self):
return self._incarnation

@property
def container_id(self):
return self._container_id

@property
def role_instance_id(self):
return self._role_instance_id

@property
def role_config_name(self):
return self._role_config_name

@property
def extensions_goal_state(self):
return self._extensions_goal_state

@property
def certs(self):
return self._certs

@property
def hosting_env(self):
return self._hosting_env

@property
def shared_conf(self):
return self._shared_conf

@property
def remote_access(self):
return self._remote_access

@staticmethod
def update_host_plugin_headers(wire_client):
"""
Updates the container ID and role config name that are send in the headers of HTTP requests to the HostGAPlugin
"""
# Fetching the goal state updates the HostGAPlugin so simply trigger the request
GoalState._fetch_goal_state(wire_client)

def update(self, force_update=False):
"""
Updates the current GoalState instance fetching values from the WireServer/HostGAPlugin as needed
"""
xml_text, xml_doc = GoalState._fetch_goal_state(self._wire_client)

vm_settings = self._fetch_vm_settings(force_update=force_update)

if force_update or self._incarnation != findtext(xml_doc, "Incarnation"):
# update the extended goal state, using vm_settings for the extensions (unless they are None, then use extensionsConfig)
self._initialize_basic_properties(xml_doc)
self._fetch_extended_goal_state(xml_text, xml_doc, vm_settings)
else:
# else just ensure the extensions are using the latest vm_settings
if vm_settings is not None:
self._extensions_goal_state = vm_settings

def save_to_history(self, data, file_name):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a temporary method while I do more refactoring on the methods to fetch the manifests

self._history.save(data, file_name)

def _initialize_basic_properties(self, xml_doc):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There weren't any changes on how we parse the goal state, so the code in this method and in _fetch_extended_goal_state was just moved here from elsewhere

self._timestamp = datetime.datetime.utcnow().isoformat()
self._incarnation = findtext(xml_doc, "Incarnation")
self._history = GoalStateHistory(self._timestamp, self._incarnation) # history for the WireServer goal state; vmSettings are separate
role_instance = find(xml_doc, "RoleInstance")
self._role_instance_id = findtext(role_instance, "InstanceId")
role_config = find(role_instance, "Configuration")
self._role_config_name = findtext(role_config, "ConfigName")
container = find(xml_doc, "Container")
self._container_id = findtext(container, "ContainerId")

@staticmethod
def _fetch_goal_state(wire_client):
"""
Issues an HTTP request for the goal state (WireServer) and returns a tuple containing the response as text and as an XML Document
"""
uri = GOAL_STATE_URI.format(wire_client.get_endpoint())

for _ in range(0, _NUM_GS_FETCH_RETRIES):
self.xml_text = wire_client.fetch_config(uri, wire_client.get_header())
xml_doc = parse_doc(self.xml_text)
self.incarnation = findtext(xml_doc, "Incarnation")
# In some environments a few goal state requests return a missing RoleInstance; these retries are used to work around that issue
# TODO: Consider retrying on 410 (ResourceGone) as well
for _ in range(0, _GET_GOAL_STATE_MAX_ATTEMPTS):
xml_text = wire_client.fetch_config(uri, wire_client.get_header())
xml_doc = parse_doc(xml_text)

role_instance = find(xml_doc, "RoleInstance")
if role_instance:
break
time.sleep(0.5)
else:
raise IncompleteGoalStateError("Fetched goal state without a RoleInstance [incarnation {inc}]".format(inc=self.incarnation))
incarnation = findtext(xml_doc, "Incarnation")
raise ProtocolError("Fetched goal state without a RoleInstance [incarnation {inc}]".format(inc=incarnation))

try:
self.role_instance_id = findtext(role_instance, "InstanceId")
role_config = find(role_instance, "Configuration")
self.role_config_name = findtext(role_config, "ConfigName")
container = find(xml_doc, "Container")
self.container_id = findtext(container, "ContainerId")

AgentGlobals.update_container_id(self.container_id)

# these properties are populated by fetch_full_goal_state()
self._hosting_env_uri = findtext(xml_doc, "HostingEnvironmentConfig")
self.hosting_env = None
self._shared_conf_uri = findtext(xml_doc, "SharedConfig")
self.shared_conf = None
self._certs_uri = findtext(xml_doc, "Certificates")
self.certs = None
self._remote_access_uri = findtext(container, "RemoteAccessInfo")
self.remote_access = None
# TODO: extensions_config is an instance member only temporarily. Once we stop comparing extensionsConfig with
# vmSettings, it will be replaced with the extensions goal state
self.extensions_config = None
self._extensions_config_uri = findtext(xml_doc, "ExtensionsConfig")
# Telemetry and the HostGAPlugin depend on the container id/role config; keep them up-to-date each time we fetch the goal state
# (note that these elements can change even if the incarnation of the goal state does not change)
container = find(xml_doc, "Container")
container_id = findtext(container, "ContainerId")
role_config = find(role_instance, "Configuration")
role_config_name = findtext(role_config, "ConfigName")

except Exception as exception:
# We don't log the error here since fetching the goal state is done every few seconds
raise ProtocolError(msg="Error fetching goal state", inner=exception)
AgentGlobals.update_container_id(container_id) # Telemetry uses this global to pick up the container id

def fetch_full_goal_state(self, wire_client):
try:
logger.info('Fetching goal state [incarnation {0}]', self.incarnation)
wire_client.update_host_plugin(container_id, role_config_name)

return xml_text, xml_doc

def _fetch_vm_settings(self, force_update=False):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The extensions goal state state is now a property of GoalState so we fetch it here

"""
Issues an HTTP request (HostGAPlugin) for the vm settings and returns the response as an ExtensionsGoalStateFromVmSettings.
"""
vm_settings, vm_settings_updated = (None, False)

if conf.get_enable_fast_track():
try:
vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update)

except VmSettingsNotSupported:
pass
except ResourceGoneError:
# retry after refreshing the HostGAPlugin
GoalState.update_host_plugin_headers(self._wire_client)
vm_settings, vm_settings_updated = self._wire_client.get_host_plugin().fetch_vm_settings(force_update=force_update)

if vm_settings_updated:
# The vmSettings are updated independently of the WireServer goal state and they are saved to a separate directory
history = GoalStateHistory(datetime.datetime.utcnow().isoformat(), vm_settings.etag)
history.save_vm_settings(vm_settings.get_redacted_text())

xml_text = wire_client.fetch_config(self._hosting_env_uri, wire_client.get_header())
self.hosting_env = HostingEnv(xml_text)
return vm_settings

xml_text = wire_client.fetch_config(self._shared_conf_uri, wire_client.get_header())
self.shared_conf = SharedConfig(xml_text)
def _fetch_extended_goal_state(self, xml_text, xml_doc, vm_settings):
"""
Issues HTTP requests (WireServer) for each of the URIs in the goal state (ExtensionsConfig, Certificate, Remote Access users, etc)
and populates the corresponding properties. If the give 'vm_settings' are not None they are used for the extensions goal state,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and populates the corresponding properties. If the give 'vm_settings' are not None they are used for the extensions goal state,

Suggested change
and populates the corresponding properties. If the give 'vm_settings' are not None they are used for the extensions goal state,
and populates the corresponding properties. If the given 'vm_settings' are not None they are used for the extensions goal state,

otherwise extensionsConfig is used instead.
"""
try:
logger.info('Fetching goal state [incarnation {0}]', self._incarnation)

if self._certs_uri is not None:
xml_text = wire_client.fetch_config(self._certs_uri, wire_client.get_header_for_cert())
self.certs = Certificates(xml_text)
self._history.save_goal_state(xml_text)
Copy link
Member Author

@narrieta narrieta Feb 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now we save the goal state to the history folder as we fetch it, instead of saving to /var/lib/waagent and then flushing it to the history folder when the incarnation changes

this requires an update in DCR, where the tests for the status blob is looking for ExtensionsConfig.*.xml in the agent's dir. I'll post the corresponding PR in the tux repo.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sample history directory:

Length Name
------ ----
       2022-02-03T00_43_06.228674_1    <=== WS goal state
       2022-02-03T00_43_07.544229     <=== status
       2022-02-03T00_44_13.783542_1310808759485798297   <=== HostGAPlugin goal state
       2022-02-03T00_44_13.783997_2
       2022-02-03T00_44_18.097053
       2022-02-03T00_44_48.209362_5216182003277442798
       2022-02-03T00_44_48.209790_3
       2022-02-03T00_44_50.330195
       2022-02-03T00_45_20.442170_4495420063731861706
       2022-02-03T00_45_20.442600_4
       2022-02-03T00_45_24.596102
       2022-02-03T00_45_48.671177_14329981429087073333
       2022-02-03T00_45_48.671655_5
       2022-02-03T00_45_52.837152
       2022-02-03T00_46_22.944478_9343934122256085228
       2022-02-03T00_46_22.944892_6
       2022-02-03T00_46_27.116754
  2993 2022-02-03T00_43_04.659319_1.zip
   785 2022-02-03T00_43_04.724605_12843543198617625598.zip
  2993 2022-02-03T00_43_06.228674_1.zip
   785 2022-02-03T00_43_06.235001_12843543198617625598.zip


if self._remote_access_uri is not None:
xml_text = wire_client.fetch_config(self._remote_access_uri, wire_client.get_header_for_cert())
self.remote_access = RemoteAccess(xml_text)
# TODO: at this point we always fetch the extensionsConfig, even if it is not needed, and save it for debugging purposes. Once
# FastTrack is stable this code can be updated to fetch it only when actually needed.
extensions_config_uri = findtext(xml_doc, "ExtensionsConfig")

if self._extensions_config_uri is None:
self.extensions_config = ExtensionsGoalStateFactory.create_empty()
if extensions_config_uri is None:
extensions_config = ExtensionsGoalStateFactory.create_empty()
else:
xml_text = wire_client.fetch_config(self._extensions_config_uri, wire_client.get_header())
self.extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(self.incarnation, xml_text, wire_client)
xml_text = self._wire_client.fetch_config(extensions_config_uri, self._wire_client.get_header())
extensions_config = ExtensionsGoalStateFactory.create_from_extensions_config(self._incarnation, xml_text, self._wire_client)
self._history.save_extensions_config(extensions_config.get_redacted_text())

if vm_settings is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't you hit the 'not defined' error

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, I don't get your comment

vm_settings is a parameter... did you mean this variable?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NM, I missed to look. I thought it's not defined in the function but it's function parameter.

self._extensions_goal_state = vm_settings
else:
self._extensions_goal_state = extensions_config

hosting_env_uri = findtext(xml_doc, "HostingEnvironmentConfig")
xml_text = self._wire_client.fetch_config(hosting_env_uri, self._wire_client.get_header())
self._hosting_env = HostingEnv(xml_text)
self._history.save_hosting_env(xml_text)

shared_conf_uri = findtext(xml_doc, "SharedConfig")
xml_text = self._wire_client.fetch_config(shared_conf_uri, self._wire_client.get_header())
self._shared_conf = SharedConfig(xml_text)
self._history.save_shared_conf(xml_text)

certs_uri = findtext(xml_doc, "Certificates")
if certs_uri is not None:
# Note that we do not save the certificates to the goal state history
xml_text = self._wire_client.fetch_config(certs_uri, self._wire_client.get_header_for_cert())
self._certs = Certificates(xml_text)

container = find(xml_doc, "Container")
remote_access_uri = findtext(container, "RemoteAccessInfo")
if remote_access_uri is not None:
xml_text = self._wire_client.fetch_config(remote_access_uri, self._wire_client.get_header_for_cert())
self._remote_access = RemoteAccess(xml_text)
self._history.save_remote_access(xml_text)

except Exception as exception:
logger.warn("Fetching the goal state failed: {0}", ustr(exception))
raise ProtocolError(msg="Error fetching goal state", inner=exception)
Expand Down
Loading