From c6c8cd7dc97d760fa949399ba50f5ffb025be6cf Mon Sep 17 00:00:00 2001 From: Richard Sherman <23365258+RikSherman@users.noreply.github.com> Date: Thu, 6 Apr 2023 09:00:27 +0100 Subject: [PATCH 1/4] BDOG-2534 Fix large profiles failing to start --- cli/cli.go | 6 ++++-- servicemanager/startfromsource.go | 23 +++++++++++++---------- servicemanager/startservice.go | 15 +++++++++++++-- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/cli/cli.go b/cli/cli.go index 8b9613a..e7d228e 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -44,6 +44,7 @@ type UserOption struct { Verify bool // checks if a given service or profile is running Wait int // waits given number of secs after starting services for then to respond to pings Workers int // sets the number of concurrent downloads/service starts + Delay int // sets the pause in seconds between starting services } func Parse(args []string) (*UserOption, error) { @@ -115,8 +116,8 @@ func fixupInvalidFlags(args []string) []string { } /* - Parses extra args for all the services. Expected format is: - {"SERVICE_NAME":["-DFoo=Bar","SOMETHING"],"SERVICE_TWO":["APPEND_THIS"]} +Parses extra args for all the services. Expected format is: +{"SERVICE_NAME":["-DFoo=Bar","SOMETHING"],"SERVICE_TWO":["APPEND_THIS"]} */ func parseAppendArgs(jsonArgs string) (map[string][]string, error) { @@ -183,6 +184,7 @@ func buildFlagSet(opts *UserOption) *flag.FlagSet { flagset.BoolVar(&opts.Verify, "verify", false, "for scripts, checks if a service/profile is running") flagset.IntVar(&opts.Wait, "wait", 0, "used with --start, waits a specified number of seconds for the services to become available before exiting (use with --start)") flagset.IntVar(&opts.Workers, "workers", defaultWorkers(), "how many services should be downloaded at the same time (use with --start)") + flagset.IntVar(&opts.Delay, "delay", 0, "how long to pause after starting a service before starting another") return flagset } diff --git a/servicemanager/startfromsource.go b/servicemanager/startfromsource.go index 9f07a94..d3456de 100644 --- a/servicemanager/startfromsource.go +++ b/servicemanager/startfromsource.go @@ -43,7 +43,9 @@ func (sm *ServiceManager) StartFromSource(serviceName string) error { return err } - return sm.Ledger.SaveStateFile(installDir, state) + err = sm.Ledger.SaveStateFile(installDir, state) + sm.pauseTillHealthy(state.HealthcheckUrl) + return err } func (sm *ServiceManager) installFromGit(installDir string, gitUrl string, service Service) (ledger.InstallFile, error) { @@ -96,17 +98,18 @@ func (sm ServiceManager) sbtBuildAndRun(srcDir string, service Service) (ledger. return state, err } + healthcheckUrl := findHealthcheckUrl(service, state.Port) state = ledger.StateFile{ - Service: service.Id, - Artifact: service.Binary.Artifact, - Version: SOURCE, - Path: srcDir, - Started: time.Now(), - Pid: cmd.Process.Pid, - Port: port, - Args: args, + Service: service.Id, + Artifact: service.Binary.Artifact, + Version: SOURCE, + Path: srcDir, + Started: time.Now(), + Pid: cmd.Process.Pid, + Port: port, + Args: args, + HealthcheckUrl: healthcheckUrl, } - return state, nil } diff --git a/servicemanager/startservice.go b/servicemanager/startservice.go index cc2b4b7..1d7b146 100644 --- a/servicemanager/startservice.go +++ b/servicemanager/startservice.go @@ -90,9 +90,20 @@ func (sm *ServiceManager) StartService(serviceAndVersion ServiceAndVersion) erro return err } state.HealthcheckUrl = healthcheckUrl - // and finally, we record out success - return sm.Ledger.SaveStateFile(installDir, state) + err = sm.Ledger.SaveStateFile(installDir, state) + sm.pauseTillHealthy(healthcheckUrl) + return err +} + +func (sm *ServiceManager) pauseTillHealthy(healthcheckUrl string) { + if sm.Commands.Delay > 0 { + count := 0 + for count < sm.Commands.Delay*2 && !sm.CheckHealth(healthcheckUrl) { + count++ + time.Sleep(500 * time.Millisecond) + } + } } func (sm *ServiceManager) installService(installDir string, serviceId string, group string, artifact string, version string) (ledger.InstallFile, error) { From 96fcaa3811ba4310c2dbaa919b69a5729ac382b6 Mon Sep 17 00:00:00 2001 From: Richard Sherman <23365258+RikSherman@users.noreply.github.com> Date: Thu, 6 Apr 2023 10:56:18 +0100 Subject: [PATCH 2/4] BDOG-2534 make delay units explicit --- cli/cli.go | 4 ++-- servicemanager/startservice.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cli/cli.go b/cli/cli.go index e7d228e..6b36289 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -44,7 +44,7 @@ type UserOption struct { Verify bool // checks if a given service or profile is running Wait int // waits given number of secs after starting services for then to respond to pings Workers int // sets the number of concurrent downloads/service starts - Delay int // sets the pause in seconds between starting services + DelaySeconds int // sets the pause in seconds between starting services } func Parse(args []string) (*UserOption, error) { @@ -184,7 +184,7 @@ func buildFlagSet(opts *UserOption) *flag.FlagSet { flagset.BoolVar(&opts.Verify, "verify", false, "for scripts, checks if a service/profile is running") flagset.IntVar(&opts.Wait, "wait", 0, "used with --start, waits a specified number of seconds for the services to become available before exiting (use with --start)") flagset.IntVar(&opts.Workers, "workers", defaultWorkers(), "how many services should be downloaded at the same time (use with --start)") - flagset.IntVar(&opts.Delay, "delay", 0, "how long to pause after starting a service before starting another") + flagset.IntVar(&opts.DelaySeconds, "delay", 0, "how long to pause, in seconds, after starting a service before starting another") return flagset } diff --git a/servicemanager/startservice.go b/servicemanager/startservice.go index 1d7b146..f5c3827 100644 --- a/servicemanager/startservice.go +++ b/servicemanager/startservice.go @@ -97,9 +97,9 @@ func (sm *ServiceManager) StartService(serviceAndVersion ServiceAndVersion) erro } func (sm *ServiceManager) pauseTillHealthy(healthcheckUrl string) { - if sm.Commands.Delay > 0 { + if sm.Commands.DelaySeconds > 0 { count := 0 - for count < sm.Commands.Delay*2 && !sm.CheckHealth(healthcheckUrl) { + for count < sm.Commands.DelaySeconds*2 && !sm.CheckHealth(healthcheckUrl) { count++ time.Sleep(500 * time.Millisecond) } From b5b5bcd75d259dbe2695570792e64eb6800f6d96 Mon Sep 17 00:00:00 2001 From: Richard Sherman <23365258+RikSherman@users.noreply.github.com> Date: Thu, 6 Apr 2023 11:10:02 +0100 Subject: [PATCH 3/4] BDOG-2534 make delay units explicit --- README.md | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 50f7d75..dd39aca 100644 --- a/README.md +++ b/README.md @@ -7,21 +7,26 @@ It's based on the the original [service-manager](https://github.com/hmrc/service ### Installing From Binary 1. Run the following command in your terminal for your operating system/cpu: -**Linux** +**Linux Intel** ```base -curl -L -O https://github.com/hmrc/sm2/releases/download/v1.0.6/sm2-1.0.6-linux-intel.zip && unzip sm2-1.0.6-linux-intel.zip && rm sm2-1.0.6-linux-intel.zip +curl -L -O https://github.com/hmrc/sm2/releases/download/v1.0.7/sm2-1.0.6-linux-intel.zip && unzip sm2-1.0.6-linux-intel.zip && rm sm2-1.0.6-linux-intel.zip +``` + +**Linux Arm64** +```base +curl -L -O https://github.com/hmrc/sm2/releases/download/v1.0.7/sm2-1.0.6-linux-arm64.zip && unzip sm2-1.0.6-linux-arm64.zip && rm sm2-1.0.6-linux-arm64.zip ``` **OSX/Apple (latest M1/M2 cpus)** ```base -curl -L -O https://github.com/hmrc/sm2/releases/download/v1.0.6/sm2-1.0.6-apple-arm64.zip && unzip sm2-1.0.6-apple-arm64.zip && rm sm2-1.0.6-apple-arm64.zip +curl -L -O https://github.com/hmrc/sm2/releases/download/v1.0.7/sm2-1.0.6-apple-arm64.zip && unzip sm2-1.0.6-apple-arm64.zip && rm sm2-1.0.6-apple-arm64.zip ``` **OSX/Apple (older Intel cpus)** ```base -curl -L -O https://github.com/hmrc/sm2/releases/download/v1.0.6/sm2-1.0.6-apple-intel.zip && unzip sm2-1.0.6-apple-intel.zip && rm sm2-1.0.6-apple-intel.zip +curl -L -O https://github.com/hmrc/sm2/releases/download/v1.0.7/sm2-1.0.6-apple-intel.zip && unzip sm2-1.0.6-apple-intel.zip && rm sm2-1.0.6-apple-intel.zip ``` If everything has worked you should have an executable called `sm2`. @@ -88,6 +93,13 @@ Alternatively you can start more than one service at once by typing multiple ser $ sm2 -start SERVICE_ONE SERVICE_TWO ``` +#### Starting a large group of services +Starting a large group of services can overload the cpu of a machine and lead to services failing to start. +If this happens use the following command to start the services at a slower pace. +```shell +$ sm2 --start LARGE_PROFILE_NAME --workers 1 --DelaySeconds 5 +``` +The workers argument starts one service at a time and the DelaySeconds argument adds a 5 second delay inbetween services. ### Starting specific versions If you need to run a specific version of a service you can do so by adding a colon followed by the version number to the service name, e.g. From fd0ba1a352e766df7d6fcbbbe9f9273fb8cfc459 Mon Sep 17 00:00:00 2001 From: Richard Sherman <23365258+RikSherman@users.noreply.github.com> Date: Thu, 6 Apr 2023 11:18:25 +0100 Subject: [PATCH 4/4] BDOG-2534 make delay units explicit --- README.md | 2 +- cli/cli.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dd39aca..9190eef 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ $ sm2 -start SERVICE_ONE SERVICE_TWO Starting a large group of services can overload the cpu of a machine and lead to services failing to start. If this happens use the following command to start the services at a slower pace. ```shell -$ sm2 --start LARGE_PROFILE_NAME --workers 1 --DelaySeconds 5 +$ sm2 --start LARGE_PROFILE_NAME --workers 1 --delay-seconds 5 ``` The workers argument starts one service at a time and the DelaySeconds argument adds a 5 second delay inbetween services. diff --git a/cli/cli.go b/cli/cli.go index 6b36289..008f94d 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -184,7 +184,7 @@ func buildFlagSet(opts *UserOption) *flag.FlagSet { flagset.BoolVar(&opts.Verify, "verify", false, "for scripts, checks if a service/profile is running") flagset.IntVar(&opts.Wait, "wait", 0, "used with --start, waits a specified number of seconds for the services to become available before exiting (use with --start)") flagset.IntVar(&opts.Workers, "workers", defaultWorkers(), "how many services should be downloaded at the same time (use with --start)") - flagset.IntVar(&opts.DelaySeconds, "delay", 0, "how long to pause, in seconds, after starting a service before starting another") + flagset.IntVar(&opts.DelaySeconds, "delay-seconds", 0, "how long to pause, in seconds, after starting a service before starting another") return flagset }