-
Notifications
You must be signed in to change notification settings - Fork 81
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Keep looking for master process #617
Changes from 11 commits
7156fcd
317da66
fe59ae9
1b8b468
21819a3
1936f20
7f97955
9f37147
b89cf0e
794ff64
ac725e2
cff923c
a3506c5
799bfd7
f72e5ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -29,6 +29,7 @@ const ( | |||||
dataplaneSoftwareDetailsMaxWaitTime = time.Duration(5 * time.Second) | ||||||
// Time between attempts to gather DataplaneSoftwareDetails | ||||||
softwareDetailsOperationInterval = time.Duration(1 * time.Second) | ||||||
nginxStartMaxWaitTime = 0 | ||||||
) | ||||||
|
||||||
type OneTimeRegistration struct { | ||||||
|
@@ -93,13 +94,16 @@ func (r *OneTimeRegistration) Process(msg *core.Message) { | |||||
defer r.dataplaneSoftwareDetailsMutex.Unlock() | ||||||
r.dataplaneSoftwareDetails[data.GetPluginName()] = data.GetDataplaneSoftwareDetails() | ||||||
} | ||||||
case msg.Exact(core.NginxDetailProcUpdate): | ||||||
r.processes = msg.Data().([]*core.Process) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would add a type check here, to ensure that the payload of the message is of the correct type (see the previous If the payload were to contain something other than |
||||||
} | ||||||
} | ||||||
|
||||||
func (r *OneTimeRegistration) Subscriptions() []string { | ||||||
return []string{ | ||||||
core.RegistrationCompletedTopic, | ||||||
core.DataplaneSoftwareDetailsUpdated, | ||||||
core.NginxDetailProcUpdate, | ||||||
} | ||||||
} | ||||||
|
||||||
|
@@ -136,7 +140,6 @@ func (r *OneTimeRegistration) areDataplaneSoftwareDetailsReady() error { | |||||
log.Trace("No extension plugins to register") | ||||||
return nil | ||||||
} | ||||||
|
||||||
r.dataplaneSoftwareDetailsMutex.Lock() | ||||||
defer r.dataplaneSoftwareDetailsMutex.Unlock() | ||||||
|
||||||
|
@@ -153,23 +156,43 @@ func (r *OneTimeRegistration) areDataplaneSoftwareDetailsReady() error { | |||||
func (r *OneTimeRegistration) registerAgent() { | ||||||
var details []*proto.NginxDetails | ||||||
|
||||||
for _, proc := range r.processes { | ||||||
// only need master process for registration | ||||||
if proc.IsMaster { | ||||||
nginxDetails := r.binary.GetNginxDetailsFromProcess(proc) | ||||||
details = append(details, nginxDetails) | ||||||
// Reading nginx config during registration to populate nginx fields like access/error logs, etc. | ||||||
_, err := r.binary.ReadConfig(nginxDetails.GetConfPath(), nginxDetails.NginxId, r.env.GetSystemUUID()) | ||||||
if err != nil { | ||||||
log.Warnf("Unable to read config for NGINX instance %s, %v", nginxDetails.NginxId, err) | ||||||
backoffSetting := backoff.BackoffSettings{ | ||||||
InitialInterval: softwareDetailsOperationInterval, | ||||||
MaxInterval: softwareDetailsOperationInterval, | ||||||
MaxElapsedTime: nginxStartMaxWaitTime, | ||||||
Jitter: backoff.BACKOFF_JITTER, | ||||||
Multiplier: backoff.BACKOFF_MULTIPLIER, | ||||||
} | ||||||
|
||||||
findMaster := func() error { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
for _, proc := range r.processes { | ||||||
// only need master process for registration | ||||||
if proc.IsMaster { | ||||||
nginxDetails := r.binary.GetNginxDetailsFromProcess(proc) | ||||||
details = append(details, nginxDetails) | ||||||
if len(details) > 0 { | ||||||
// Reading nginx config during registration to populate nginx fields like access/error logs, etc. | ||||||
_, err := r.binary.ReadConfig(nginxDetails.GetConfPath(), nginxDetails.NginxId, r.env.GetSystemUUID()) | ||||||
if err != nil { | ||||||
log.Warnf("Unable to read config for NGINX instance %s, %v", nginxDetails.NginxId, err) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know that this line isn't originally your code, but I'm wondering if we should actually be returning the error here instead of merely logging it 🤔 @oliveromahony, @dhurley, @Dean-Coakley, @aphralG thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe the reason we dont return an error here is because we dont want to stop registration just because we cant read the config. If the config is invalid it should not stop the agent from performing other tasks like reporting the status of NGINX is that makes sense. |
||||||
} | ||||||
log.Info("Master process has been found") | ||||||
return nil | ||||||
} | ||||||
} else { | ||||||
log.Tracef("NGINX non-master process: %d", proc.Pid) | ||||||
return nil | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||||||
} | ||||||
} else { | ||||||
log.Tracef("NGINX non-master process: %d", proc.Pid) | ||||||
} | ||||||
return fmt.Errorf("No master process found, waiting...") | ||||||
} | ||||||
if len(details) == 0 { | ||||||
log.Info("No master process found") | ||||||
err := backoff.WaitUntil( | ||||||
context.Background(), backoffSetting, findMaster, | ||||||
) | ||||||
if err != nil { | ||||||
log.Warn(err.Error()) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was previously:
But my instinct is that this should be an error log, as I'd think we can't complete registration if we haven't found a master process. Others could also weigh in on this, as I wouldn't consider myself an expert on registration. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I believe previously we would continue with registration even if we didn't find master processes but now that we are waiting until we find one I think it should be an error log message.
|
||||||
} | ||||||
|
||||||
updated, err := types.TimestampProto(r.config.Updated) | ||||||
if err != nil { | ||||||
log.Warnf("failed to parse proto timestamp %s: %s, assuming now", r.config.Updated, err) | ||||||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would this not essentially turn off backoff altogether, if the
MaxElapsedTime
is zero? If that is intentional, then I would maybe adding a comment explaining why it is turned off.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When the max wait time is set to 0 backoff will continue waiting indefinitely until the master process is found. I'll add in a comment now