-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rename to management.Manager, add UpdateStatus to Manager interface. #19114
Changes from all commits
64ffc12
725505d
5ab1334
2e8affe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ import ( | |
"fmt" | ||
"os" | ||
"sort" | ||
"sync" | ||
|
||
"github.com/gofrs/uuid" | ||
"github.com/pkg/errors" | ||
|
@@ -35,12 +36,15 @@ type Manager struct { | |
registry *reload.Registry | ||
blacklist *xmanagement.ConfigBlacklist | ||
client *client.Client | ||
lock sync.Mutex | ||
status management.Status | ||
msg string | ||
|
||
stopFunc func() | ||
} | ||
|
||
// NewFleetManager returns a X-Pack Beats Fleet Management manager. | ||
func NewFleetManager(config *common.Config, registry *reload.Registry, beatUUID uuid.UUID) (management.ConfigManager, error) { | ||
func NewFleetManager(config *common.Config, registry *reload.Registry, beatUUID uuid.UUID) (management.Manager, error) { | ||
c := defaultConfig() | ||
if config.Enabled() { | ||
if err := config.Unpack(&c); err != nil { | ||
|
@@ -51,7 +55,7 @@ func NewFleetManager(config *common.Config, registry *reload.Registry, beatUUID | |
} | ||
|
||
// NewFleetManagerWithConfig returns a X-Pack Beats Fleet Management manager. | ||
func NewFleetManagerWithConfig(c *Config, registry *reload.Registry, beatUUID uuid.UUID) (management.ConfigManager, error) { | ||
func NewFleetManagerWithConfig(c *Config, registry *reload.Registry, beatUUID uuid.UUID) (management.Manager, error) { | ||
log := logp.NewLogger(management.DebugK) | ||
|
||
m := &Manager{ | ||
|
@@ -122,38 +126,51 @@ func (cm *Manager) CheckRawConfig(cfg *common.Config) error { | |
return nil | ||
} | ||
|
||
// UpdateStatus updates the manager with the current status for the beat. | ||
func (cm *Manager) UpdateStatus(status management.Status, msg string) { | ||
cm.lock.Lock() | ||
defer cm.lock.Unlock() | ||
|
||
if cm.status != status || cm.msg != msg { | ||
cm.status = status | ||
cm.msg = msg | ||
cm.client.Status(statusToProtoStatus(status), msg) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here we update status on the client which will result in some action either during checkin or in watcher after some timeout. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This flow is valid from the side of the Elastic Agent, as reporting Failed will cause the application to be killed and restarted. This is fully tested in covered in the Elastic Agent unit tests, both in the case that the application returns Failed over the protocol and if the application just crashes. |
||
cm.logger.Infof("Status change to %s: %s", status, msg) | ||
} | ||
} | ||
|
||
func (cm *Manager) OnConfig(s string) { | ||
cm.client.Status(proto.StateObserved_CONFIGURING, "Updating configuration") | ||
cm.UpdateStatus(management.Configuring, "Updating configuration") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we switch back after config is applied or do we let beat to do that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does switch back to Running at the end of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you;re right it was hidden so i overlooked There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I read this right OnConfig sets the status to 'Healthy' at the end. Are we sure this is the correct state to report? Do we 'forget' the internal state here? How about removing the 'Configuring' status? The less states (and protocol behavior) we have, the less we can get it wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally do not like the idea of setting the status in the At the moment I keep it this way so it works, and once status is being reported correctly by the beat, this should be removed (probably at the same time). I do not think we should removing It is good to know that going There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
+1 |
||
|
||
var configMap common.MapStr | ||
uconfig, err := common.NewConfigFrom(s) | ||
if err != nil { | ||
err = errors.Wrap(err, "config blocks unsuccessfully generated") | ||
cm.logger.Error(err) | ||
cm.client.Status(proto.StateObserved_FAILED, err.Error()) | ||
cm.UpdateStatus(management.Failed, err.Error()) | ||
return | ||
} | ||
|
||
err = uconfig.Unpack(&configMap) | ||
if err != nil { | ||
err = errors.Wrap(err, "config blocks unsuccessfully generated") | ||
cm.logger.Error(err) | ||
cm.client.Status(proto.StateObserved_FAILED, err.Error()) | ||
cm.UpdateStatus(management.Failed, err.Error()) | ||
return | ||
} | ||
|
||
blocks, err := cm.toConfigBlocks(configMap) | ||
if err != nil { | ||
err = errors.Wrap(err, "could not apply the configuration") | ||
cm.logger.Error(err) | ||
cm.client.Status(proto.StateObserved_FAILED, err.Error()) | ||
cm.UpdateStatus(management.Failed, err.Error()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given all these errors I wonder what 'Failed' means. Is the beat really 'failed' or are we (the overall application with agent) running in a degraded mode because not all configurations could have been applied? Even if we fail here the Beat continues to publish events for existing inputs, right? Are we mixing the status of the 'operation' with the beat health here? If the operation fails, are we required to restart the Beat? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As stated in the list of statuses there is
In Degarded state beat can keep sending events and agent will not stop the process. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I've seen these status types. Is this method run on startup only, or also if the beat is already active? In the later case the Beat will continue with the old state if there was an error decoding the configuration. Do we want to enforce a 'restart' if the config update operation failed? Will
I'm more or less wondering about the overal semantics of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It will run both on startup and while running. The beat doesn't need to worry about handling the restart on Failed status, the Agent will do that work for it, restart it and bring it back up. |
||
return | ||
} | ||
|
||
if errs := cm.apply(blocks); !errs.IsEmpty() { | ||
err = errors.Wrap(err, "could not apply the configuration") | ||
cm.logger.Error(err) | ||
cm.client.Status(proto.StateObserved_FAILED, err.Error()) | ||
cm.UpdateStatus(management.Failed, err.Error()) | ||
return | ||
} | ||
|
||
|
@@ -285,3 +302,25 @@ func (cm *Manager) toConfigBlocks(cfg common.MapStr) (api.ConfigBlocks, error) { | |
|
||
return res, nil | ||
} | ||
|
||
func statusToProtoStatus(status management.Status) proto.StateObserved_Status { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we add unit test to test switching statuses back and forth with some noop client? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment I don't believe client from |
||
switch status { | ||
case management.Unknown: | ||
// unknown is reported as healthy, as the status is unknown | ||
return proto.StateObserved_HEALTHY | ||
case management.Starting: | ||
return proto.StateObserved_STARTING | ||
case management.Configuring: | ||
return proto.StateObserved_CONFIGURING | ||
case management.Running: | ||
return proto.StateObserved_HEALTHY | ||
case management.Degraded: | ||
return proto.StateObserved_DEGRADED | ||
case management.Failed: | ||
return proto.StateObserved_FAILED | ||
case management.Stopping: | ||
return proto.StateObserved_STOPPING | ||
} | ||
// unknown status, still reported as healthy | ||
return proto.StateObserved_HEALTHY | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we need this here if this is nil manager - noop manager?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do it only for logging the status changes. I think it is good to show status updates in the logger as well, so it's clear what is happening in the beat, even if its not controlled by Elastic Agent.