Skip to content

Commit

Permalink
skip offline nodes, let user specify number of executors per node (#1)
Browse files Browse the repository at this point in the history
* skip offline nodes, let user specify number of executors per node
  • Loading branch information
Shareed2k authored May 19, 2022
1 parent 4124385 commit 477ba5e
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 37 deletions.
6 changes: 6 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ var (
Default: 1,
NoPrefix: true,
},
{
Name: "node_num_executors",
Help: "number of executors per node",
Default: 1,
NoPrefix: true,
},
{
Name: "min_nodes_during_working_hours",
Help: "the minimum nodes to keep while inside working hours",
Expand Down
43 changes: 32 additions & 11 deletions pkg/scaler/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,42 +30,63 @@ func NewClient(opt *Options) *WrapperClient {
}

// GetCurrentUsage return the current usage of jenkins nodes.
func (c *WrapperClient) GetCurrentUsage(ctx context.Context) (int64, error) {
func (c *WrapperClient) GetCurrentUsage(ctx context.Context, numNodes int64) (int64, error) {
computers, err := c.computers(ctx)
if err != nil {
return 0, err
}

currentUsage := int64((float64(computers.BusyExecutors) / float64(computers.TotalExecutors)) * 100)
currentUsage := int64((float64(computers.BusyExecutors) / float64(numNodes*c.opt.NodeNumExecutors)) * 100)

return currentUsage, nil
}

func (c *WrapperClient) GetAllNodes(ctx context.Context, withBuildInNode bool) (Nodes, error) {
func (c *WrapperClient) GetAllNodes(ctx context.Context) (Nodes, error) {
computers, err := c.computers(ctx)
if err != nil {
return nil, err
}

nodes := make(Nodes, len(computers.Computers))
for _, node := range computers.Computers {
// skip master node
if !withBuildInNode && c.opt.ControllerNodeName == node.DisplayName {
nodes[node.DisplayName] = &gojenkins.Node{Jenkins: c.Jenkins, Raw: node, Base: "/computer/" + node.DisplayName}
}

return nodes, nil
}

func (n Nodes) IsExist(name string) (*gojenkins.Node, bool) {
if node, ok := n[name]; ok {
return node, true
}

return nil, false
}

func (n Nodes) ExcludeOffline() Nodes {
nodes := make(Nodes, 0)
for name, node := range n {
if node.Raw.Offline == true {
continue
}

nodes[node.DisplayName] = &gojenkins.Node{Jenkins: c.Jenkins, Raw: node, Base: "/computer/" + node.DisplayName}
nodes[name] = node
}

return nodes, nil
return nodes
}

func (n Nodes) IsExist(name string) bool {
if _, ok := n[name]; ok {
return true
func (n Nodes) ExcludeNode(name string) Nodes {
nodes := make(Nodes, 0)
for i, node := range n {
if name == node.Raw.DisplayName {
continue
}

nodes[i] = node
}

return false
return nodes
}

func (c *WrapperClient) computers(ctx context.Context) (*gojenkins.Computers, error) {
Expand Down
29 changes: 21 additions & 8 deletions pkg/scaler/scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ import (

type (
Jenkinser interface {
GetCurrentUsage(ctx context.Context) (int64, error)
GetCurrentUsage(ctx context.Context, numNodes int64) (int64, error)
DeleteNode(ctx context.Context, name string) (bool, error)
GetAllNodes(ctx context.Context, withMaster bool) (Nodes, error)
GetAllNodes(ctx context.Context) (Nodes, error)
GetNode(ctx context.Context, name string) (*gojenkins.Node, error)
}

Expand Down Expand Up @@ -60,6 +60,7 @@ type (
JenkinsToken string `config:"jenkins_token" validate:"required"`
ControllerNodeName string `config:"controller_node_name"`
WorkingHoursCronExpressions string `config:"working_hours_cron_expressions"`
NodeNumExecutors int64 `config:"node_num_executors"`
MaxNodes int64 `config:"max_nodes"`
MinNodesInWorkingHours int64 `config:"min_nodes_during_working_hours"`
ScaleUpThreshold int64 `config:"scale_up_threshold"`
Expand Down Expand Up @@ -170,16 +171,20 @@ func (s *Scaler) Do(ctx context.Context) {

logger := s.logger

usage, err := s.client.GetCurrentUsage(ctx)
nodes, err := s.client.GetAllNodes(s.ctx)
if err != nil {
logger.Errorf("failed getting current usage: %v", err)
logger.Error(err)

return
}

nodes, err := s.client.GetAllNodes(s.ctx, false)
nodes = nodes.
ExcludeNode(s.opt.ControllerNodeName).
ExcludeOffline()

usage, err := s.client.GetCurrentUsage(ctx, int64(len(nodes)))
if err != nil {
logger.Error(err)
logger.Errorf("failed getting current usage: %v", err)

return
}
Expand Down Expand Up @@ -255,6 +260,10 @@ func (s *Scaler) scaleUp(usage int64) error {
newSize = maxSize
}

if newSize == curSize {
return nil
}

logger.Infof("will spin up %d extra nodes", newSize-curSize)
logger.Debugf("new target size: %d", newSize)

Expand Down Expand Up @@ -419,11 +428,15 @@ func (s *Scaler) gc(ctx context.Context) error {
logger := s.logger
logger.Debug("running GC...")

nodes, err := s.client.GetAllNodes(ctx, false)
nodes, err := s.client.GetAllNodes(ctx)
if err != nil {
return err
}

nodes = nodes.
ExcludeNode(s.opt.ControllerNodeName).
ExcludeOffline()

instances, err := s.backend.Instances()
if err != nil {
return err
Expand All @@ -434,7 +447,7 @@ func (s *Scaler) gc(ctx context.Context) error {
for _, instance := range instances {
// verify that each instance is being seen by Jenkins
name := instance.Name()
if nodes.IsExist(name) {
if node, ok := nodes.IsExist(name); ok && !node.Raw.Offline {
continue
}

Expand Down
25 changes: 24 additions & 1 deletion pkg/scaler/scaler_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ type (
name string
launchTime *time.Time
}

NodeOption func(c *gojenkins.Node)
)

func (i fakeInstance) Describe() error {
Expand All @@ -32,16 +34,37 @@ func (i fakeInstance) LaunchTime() *time.Time {
return i.launchTime
}

func makeFakeNodes(size int) scaler.Nodes {
func mergeFakeTypes[M ~map[K]V, K string, V any](dst, src M) M {
i := len(dst) - 1
for _, v := range src {
i++
dst[K(fmt.Sprintf("%d", i))] = v
}

return dst
}

func WithOffline() NodeOption {
return func(n *gojenkins.Node) {
n.Raw.Offline = true
}
}

func makeFakeNodes(size int, opts ...NodeOption) scaler.Nodes {
nodes := make(scaler.Nodes, size)
for i := 0; i < size; i++ {
name := fmt.Sprintf("%d", i)

nodes[name] = &gojenkins.Node{
Raw: &gojenkins.NodeResponse{
DisplayName: name,
Idle: true,
},
}

for _, opt := range opts {
opt(nodes[name])
}
}

return nodes
Expand Down
46 changes: 37 additions & 9 deletions pkg/scaler/scaler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ var _ = Describe("Scaler", func() {

Describe("GC", func() {
It("clear zombies", func() {
client.EXPECT().GetAllNodes(gomock.Any(), false).Return(makeFakeNodes(3), nil).Times(1)
client.EXPECT().GetAllNodes(gomock.Any()).Return(makeFakeNodes(3), nil).Times(1)
// provider will decrease instances to 3
bk.EXPECT().Instances().Return(makeFakeInstances(5), nil).Times(1)
bk.EXPECT().Terminate(gomock.Any()).DoAndReturn(func(ins backend.Instances) error {
Expand All @@ -80,14 +80,28 @@ var _ = Describe("Scaler", func() {

scal.GC(ctx)
})

It("clear also if node is exist but in offline", func() {
nodes := mergeFakeTypes(makeFakeNodes(2, WithOffline()), makeFakeNodes(4))

// this function should already check if jenkins node is offline and not include it in result but we have another check for sure
client.EXPECT().GetAllNodes(gomock.Any()).Return(nodes, nil).Times(1)
bk.EXPECT().Instances().Return(makeFakeInstances(6), nil).Times(1)
bk.EXPECT().Terminate(gomock.Any()).DoAndReturn(func(ins backend.Instances) error {
Expect(ins).To(HaveLen(2))
return nil
}).Times(1)

scal.GC(ctx)
})
})

Describe("Do", func() {
Context("scaleUp", func() {
It("run provider with min nodes and havy usage, will scale out provider to 1 more", func() {
// 77% usage, and default scale up threshold is 70%
client.EXPECT().GetCurrentUsage(gomock.Any()).Return(int64(77), nil).Times(1)
client.EXPECT().GetAllNodes(gomock.Any(), false).Return(makeFakeNodes(3), nil).Times(1)
client.EXPECT().GetCurrentUsage(gomock.Any(), gomock.Any()).Return(int64(77), nil).Times(1)
client.EXPECT().GetAllNodes(gomock.Any()).Return(makeFakeNodes(3), nil).Times(1)
// cloud backend have 2 running instances, this is default min value
bk.EXPECT().CurrentSize().Return(int64(3), nil).Times(1)
// provider will use new value of 3
Expand All @@ -99,10 +113,10 @@ var _ = Describe("Scaler", func() {
Context("scaleDown", func() {
It("run provider with 5 nodes and low usage, will scale in provider, decrease from 5 to 4", func() {
// 28% usage, and default scale down threshold is 30%
client.EXPECT().GetCurrentUsage(gomock.Any()).Return(int64(28), nil).Times(1)
client.EXPECT().GetCurrentUsage(gomock.Any(), gomock.Any()).Return(int64(28), nil).Times(1)
// will retrun current provider running nodes
nodes := makeFakeNodes(5)
client.EXPECT().GetAllNodes(gomock.Any(), false).Return(nodes, nil).Times(1)
client.EXPECT().GetAllNodes(gomock.Any()).Return(nodes, nil).Times(1)
client.EXPECT().GetNode(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, name string) (*gojenkins.Node, error) {
return nodes[name], nil
}).Times(1)
Expand All @@ -117,9 +131,9 @@ var _ = Describe("Scaler", func() {
Context("scaleToMinimum", func() {
It("run provider with 0 nodes, will scale out provider to default values", func() {
// no usage, cluster is empty of jobs, starting the day
client.EXPECT().GetCurrentUsage(gomock.Any()).Return(int64(98), nil).Times(1)
client.EXPECT().GetCurrentUsage(gomock.Any(), gomock.Any()).Return(int64(98), nil).Times(1)
// no running nodes
client.EXPECT().GetAllNodes(gomock.Any(), false).Return(make(scaler.Nodes), nil).Times(1)
client.EXPECT().GetAllNodes(gomock.Any()).Return(make(scaler.Nodes), nil).Times(1)
// provider will use default value of 2
bk.EXPECT().Resize(int64(2)).Return(nil).Times(1)

Expand All @@ -133,14 +147,28 @@ var _ = Describe("Scaler", func() {
Expect(err).To(Not(HaveOccurred()))

// no usage, cluster is empty of jobs, starting the day
client.EXPECT().GetCurrentUsage(gomock.Any()).Return(int64(0), nil).Times(1)
client.EXPECT().GetCurrentUsage(gomock.Any(), gomock.Any()).Return(int64(0), nil).Times(1)
// no running nodes
client.EXPECT().GetAllNodes(gomock.Any(), false).Return(make(scaler.Nodes), nil).Times(1)
client.EXPECT().GetAllNodes(gomock.Any()).Return(make(scaler.Nodes), nil).Times(1)
// provider will use default value of 1
bk.EXPECT().Resize(int64(1)).Return(nil).Times(1)

scal.Do(ctx)
})

It("run provider with 0 nodes, in working hours", func() {
scal, err = scaler.NewWithClient(cfg, bk, client, logger, metrics)
Expect(err).To(Not(HaveOccurred()))

// no usage, cluster is empty of jobs, starting the day
client.EXPECT().GetCurrentUsage(gomock.Any(), gomock.Any()).Return(int64(0), nil).Times(1)
// no running nodes
client.EXPECT().GetAllNodes(gomock.Any()).Return(make(scaler.Nodes), nil).Times(1)
// provider will use default value of 2
bk.EXPECT().Resize(int64(2)).Return(nil).Times(1)

scal.Do(ctx)
})
})
})
})
16 changes: 8 additions & 8 deletions pkg/testing/mocks/scaler/client_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 477ba5e

Please sign in to comment.