diff --git a/cmd/incusd/api_cluster_evacuation.go b/cmd/incusd/api_cluster_evacuation.go index 1663a736ca0..cf942ee73f7 100644 --- a/cmd/incusd/api_cluster_evacuation.go +++ b/cmd/incusd/api_cluster_evacuation.go @@ -607,20 +607,12 @@ func evacuateClusterSelectTarget(ctx context.Context, s *state.State, inst insta // If target member not specified yet, then find the least loaded cluster member which // supports the instance's architecture. - if targetMemberInfo == nil { - var err error - - err = s.DB.Cluster.Transaction(ctx, func(ctx context.Context, tx *db.ClusterTx) error { - targetMemberInfo, err = tx.GetNodeWithLeastInstances(ctx, candidateMembers) - if err != nil { - return err - } + if targetMemberInfo == nil && len(candidateMembers) > 0 { + targetMemberInfo = &candidateMembers[0] + } - return nil - }) - if err != nil { - return nil, nil, err - } + if targetMemberInfo == nil { + return nil, nil, fmt.Errorf("Couldn't find a cluster member for the instance") } return sourceMemberInfo, targetMemberInfo, nil diff --git a/cmd/incusd/instance_post.go b/cmd/incusd/instance_post.go index c8753817e73..2e74d2d4b28 100644 --- a/cmd/incusd/instance_post.go +++ b/cmd/incusd/instance_post.go @@ -367,13 +367,11 @@ func instancePost(d *Daemon, r *http.Request) response.Response { } } - err := s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - targetMemberInfo, err = tx.GetNodeWithLeastInstances(ctx, filteredCandidateMembers) - return err - }) - if err != nil { - return response.SmartError(err) + if len(filteredCandidateMembers) == 0 { + return response.InternalError(fmt.Errorf("Couldn't find a cluster member for the instance")) } + + targetMemberInfo = &filteredCandidateMembers[0] } if targetMemberInfo.IsOffline(s.GlobalConfig.OfflineThreshold()) { diff --git a/cmd/incusd/instances_post.go b/cmd/incusd/instances_post.go index 101a812be21..118e3d7a943 100644 --- a/cmd/incusd/instances_post.go +++ b/cmd/incusd/instances_post.go @@ -1130,14 +1130,12 @@ func instancesPost(d *Daemon, r *http.Request) response.Response { } // If no target member was selected yet, pick the member with the least number of instances. + if targetMemberInfo == nil && len(candidateMembers) > 0 { + targetMemberInfo = &candidateMembers[0] + } + if targetMemberInfo == nil { - err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - targetMemberInfo, err = tx.GetNodeWithLeastInstances(ctx, candidateMembers) - return err - }) - if err != nil { - return response.SmartError(err) - } + return response.InternalError(fmt.Errorf("Couldn't find a cluster member for the instance")) } } diff --git a/doc/api-extensions.md b/doc/api-extensions.md index a9258dcc268..ffc523d29bc 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -2627,3 +2627,7 @@ Allow for creating new LVM cluster pools by setting the `source` to the shared b ## `network_ovn_external_interfaces` This adds support for `bridge.external_interfaces` on OVN networks. + +## `instances_scriptlet_get_instances_count` + +This allows the instance scriptlet to fetch the count instances given an optional Project or Location filter as well as including pending instances. diff --git a/doc/explanation/clustering.md b/doc/explanation/clustering.md index 381ae207416..1470c168f45 100644 --- a/doc/explanation/clustering.md +++ b/doc/explanation/clustering.md @@ -202,6 +202,7 @@ The following functions are available to the scriptlet (in addition to those pro - `get_cluster_member_state(member_name)`: Get the cluster member's state. Returns an object with the cluster member's state in the form of [`api.ClusterMemberState`](https://pkg.go.dev/github.com/lxc/incus/shared/api#ClusterMemberState). `member_name` is the name of the cluster member to get the state for. - `get_instance_resources()`: Get information about the resources the instance will require. Returns an object with the resource information in the form of [`scriptlet.InstanceResources`](https://pkg.go.dev/github.com/lxc/incus/shared/api/scriptlet/#InstanceResources). - `get_instances(location, project)`: Get a list of instances based on project and/or location filters. Returns the list of instances in the form of [`[]api.Instance`](https://pkg.go.dev/github.com/lxc/incus/shared/api#Instance). +- `get_instances_count(location, project, pending)`: Get a count of the instances based on project and/or location filters. The count may include instances currently being created for which no database record exists yet.. - `get_cluster_members(group)`: Get a list of cluster members based on the cluster group. Returns the list of cluster members in the form of [`[]api.ClusterMember`](https://pkg.go.dev/github.com/lxc/incus/shared/api#ClusterMember). - `get_project(name)`: Get a project object based on the project name. Returns a project object in the form of [`api.Project`](https://pkg.go.dev/github.com/lxc/incus/shared/api#Project). diff --git a/internal/server/db/instances.go b/internal/server/db/instances.go index a78eb595aca..927b234991d 100644 --- a/internal/server/db/instances.go +++ b/internal/server/db/instances.go @@ -13,6 +13,7 @@ import ( internalInstance "github.com/lxc/incus/v6/internal/instance" "github.com/lxc/incus/v6/internal/server/db/cluster" + "github.com/lxc/incus/v6/internal/server/db/operationtype" "github.com/lxc/incus/v6/internal/server/db/query" deviceConfig "github.com/lxc/incus/v6/internal/server/device/config" "github.com/lxc/incus/v6/internal/server/instance/instancetype" @@ -1103,3 +1104,102 @@ func UpdateInstance(tx *sql.Tx, id int, description string, architecture int, ep return nil } + +// GetInstancesCount returns the number of instances with possible filtering for project or location. +// It also supports looking for instances currently being created. +func (c *ClusterTx) GetInstancesCount(ctx context.Context, projectName string, locationName string, includePending bool) (int, error) { + var err error + + // Load the project ID if needed. + projectID := int64(-1) + if projectName != "" { + projectID, err = cluster.GetProjectID(ctx, c.Tx(), projectName) + if err != nil { + return -1, err + } + } + + // Load the cluster member ID if needed. + nodeID := int64(-1) + if locationName != "" { + nodeID, err = cluster.GetNodeID(ctx, c.Tx(), locationName) + if err != nil { + return -1, err + } + } + + // Count the instances. + var count int + + if projectID != -1 && nodeID != -1 { + // Count for specified project and cluster member. + created, err := query.Count(ctx, c.tx, "instances", "project_id=? AND node_id=?", projectID, nodeID) + if err != nil { + return -1, fmt.Errorf("Failed to get instances count: %w", err) + } + + count += created + + if includePending { + pending, err := query.Count(ctx, c.tx, "operations", "project_id=? AND node_id=? AND type=?", projectID, nodeID, operationtype.InstanceCreate) + if err != nil { + return -1, fmt.Errorf("Failed to get pending instances count: %w", err) + } + + count += pending + } + } else if projectID != -1 { + // Count for specified project. + created, err := query.Count(ctx, c.tx, "instances", "project_id=?", projectID) + if err != nil { + return -1, fmt.Errorf("Failed to get instances count: %w", err) + } + + count += created + + if includePending { + pending, err := query.Count(ctx, c.tx, "operations", "project_id=? AND type=?", projectID, operationtype.InstanceCreate) + if err != nil { + return -1, fmt.Errorf("Failed to get pending instances count: %w", err) + } + + count += pending + } + } else if nodeID != -1 { + // Count for specified cluster member. + created, err := query.Count(ctx, c.tx, "instances", "node_id=?", nodeID) + if err != nil { + return -1, fmt.Errorf("Failed to get instances count: %w", err) + } + + count += created + + if includePending { + pending, err := query.Count(ctx, c.tx, "operations", "node_id=? AND type=?", nodeID, operationtype.InstanceCreate) + if err != nil { + return -1, fmt.Errorf("Failed to get pending instances count: %w", err) + } + + count += pending + } + } else { + // Count everything. + created, err := query.Count(ctx, c.tx, "instances", "") + if err != nil { + return -1, fmt.Errorf("Failed to get instances count: %w", err) + } + + count += created + + if includePending { + pending, err := query.Count(ctx, c.tx, "operations", "type=?", operationtype.InstanceCreate) + if err != nil { + return -1, fmt.Errorf("Failed to get pending instances count: %w", err) + } + + count += pending + } + } + + return count, nil +} diff --git a/internal/server/db/node.go b/internal/server/db/node.go index 81b7f3c9343..bf81482e685 100644 --- a/internal/server/db/node.go +++ b/internal/server/db/node.go @@ -8,12 +8,12 @@ import ( "fmt" "net/http" "slices" + "sort" "strconv" "strings" "time" "github.com/lxc/incus/v6/internal/server/db/cluster" - "github.com/lxc/incus/v6/internal/server/db/operationtype" "github.com/lxc/incus/v6/internal/server/db/query" localUtil "github.com/lxc/incus/v6/internal/server/util" "github.com/lxc/incus/v6/internal/version" @@ -1135,40 +1135,14 @@ func (c *ClusterTx) GetCandidateMembers(ctx context.Context, allMembers []NodeIn } } - return candidateMembers, nil -} - -// GetNodeWithLeastInstances returns the name of the member with the least number of instances that are either -// already created or being created with an operation. -func (c *ClusterTx) GetNodeWithLeastInstances(ctx context.Context, members []NodeInfo) (*NodeInfo, error) { - var member *NodeInfo - var lowestInstanceCount = -1 - - for i := range members { - // Fetch the number of instances already created on this member. - created, err := query.Count(ctx, c.tx, "instances", "node_id=?", members[i].ID) - if err != nil { - return nil, fmt.Errorf("Failed to get instances count: %w", err) - } + sort.Slice(candidateMembers, func(i int, j int) bool { + iCount, _ := c.GetInstancesCount(ctx, "", candidateMembers[i].Name, true) + jCount, _ := c.GetInstancesCount(ctx, "", candidateMembers[j].Name, true) - // Fetch the number of instances currently being created on this member. - pending, err := query.Count(ctx, c.tx, "operations", "node_id=? AND type=?", members[i].ID, operationtype.InstanceCreate) - if err != nil { - return nil, fmt.Errorf("Failed to get pending instances count: %w", err) - } - - memberInstanceCount := created + pending - if lowestInstanceCount == -1 || memberInstanceCount < lowestInstanceCount { - lowestInstanceCount = memberInstanceCount - member = &members[i] - } - } - - if member == nil { - return nil, api.StatusErrorf(http.StatusNotFound, "No suitable cluster member could be found") - } + return iCount < jCount + }) - return member, nil + return candidateMembers, nil } // SetNodeVersion updates the schema and API version of the node with the diff --git a/internal/server/db/node_test.go b/internal/server/db/node_test.go index 68940f2953b..a66d5150a32 100644 --- a/internal/server/db/node_test.go +++ b/internal/server/db/node_test.go @@ -305,7 +305,7 @@ INSERT INTO storage_volumes(name, storage_pool_id, node_id, type, project_id, de // If there are 2 online nodes, return the address of the one with the least // number of instances. -func TestGetNodeWithLeastInstances(t *testing.T) { +func TestGetCandidateMembers(t *testing.T) { tx, cleanup := db.NewTestClusterTx(t) defer cleanup() @@ -325,14 +325,12 @@ INSERT INTO instances (id, node_id, name, architecture, type, project_id, descri require.NoError(t, err) require.Len(t, members, 2) - member, err := tx.GetNodeWithLeastInstances(context.Background(), members) - require.NoError(t, err) - assert.Equal(t, "buzz", member.Name) + assert.Equal(t, "buzz", members[0].Name) } // If there are nodes, and one of them is offline, return the name of the // online node, even if the offline one has more instances. -func TestGetNodeWithLeastInstances_OfflineNode(t *testing.T) { +func TestGetCandidateMembers_OfflineNode(t *testing.T) { tx, cleanup := db.NewTestClusterTx(t) defer cleanup() @@ -356,14 +354,12 @@ INSERT INTO instances (id, node_id, name, architecture, type, project_id, descri require.NoError(t, err) require.Len(t, members, 1) - member, err := tx.GetNodeWithLeastInstances(context.Background(), members) - require.NoError(t, err) - assert.Equal(t, "buzz", member.Name) + assert.Equal(t, "buzz", members[0].Name) } // If there are 2 online nodes, and an instance is pending on one of them, // return the address of the other one number of instances. -func TestGetNodeWithLeastInstances_Pending(t *testing.T) { +func TestGetCandidateMembers_Pending(t *testing.T) { tx, cleanup := db.NewTestClusterTx(t) defer cleanup() @@ -383,14 +379,12 @@ INSERT INTO operations (id, uuid, node_id, type, project_id) VALUES (1, 'abc', 1 require.NoError(t, err) require.Len(t, members, 2) - member, err := tx.GetNodeWithLeastInstances(context.Background(), members) - require.NoError(t, err) - assert.Equal(t, "buzz", member.Name) + assert.Equal(t, "buzz", members[0].Name) } // If specific architectures were selected, return only nodes with those // architectures. -func TestGetNodeWithLeastInstances_Architecture(t *testing.T) { +func TestGetCandidateMembers_Architecture(t *testing.T) { tx, cleanup := db.NewTestClusterTx(t) defer cleanup() @@ -419,9 +413,7 @@ INSERT INTO instances (id, node_id, name, architecture, type, project_id, descri require.Len(t, members, 1) // The local member is returned despite it has more instances. - member, err := tx.GetNodeWithLeastInstances(context.Background(), members) - require.NoError(t, err) - assert.Equal(t, "none", member.Name) + assert.Equal(t, "none", members[0].Name) } func TestUpdateNodeFailureDomain(t *testing.T) { @@ -452,7 +444,7 @@ func TestUpdateNodeFailureDomain(t *testing.T) { assert.Equal(t, map[string]uint64{"0.0.0.0": 0, "1.2.3.4:666": 0}, domains) } -func TestGetNodeWithLeastInstances_DefaultArch(t *testing.T) { +func TestGetCandidateMembers_DefaultArch(t *testing.T) { tx, cleanup := db.NewTestClusterTx(t) defer cleanup() @@ -480,7 +472,5 @@ INSERT INTO instances (id, node_id, name, architecture, type, project_id, descri require.NoError(t, err) require.Len(t, members, 1) - member, err := tx.GetNodeWithLeastInstances(context.Background(), members) - require.NoError(t, err) - assert.Equal(t, "buzz", member.Name) + assert.Equal(t, "buzz", members[0].Name) } diff --git a/internal/server/scriptlet/instance_placement.go b/internal/server/scriptlet/instance_placement.go index bc2ba27fdbf..24ac7179005 100644 --- a/internal/server/scriptlet/instance_placement.go +++ b/internal/server/scriptlet/instance_placement.go @@ -99,7 +99,7 @@ func InstancePlacementRun(ctx context.Context, l logger.Logger, s *state.State, rv, err := StarlarkMarshal(res) if err != nil { - return nil, fmt.Errorf("Marshalling member resources for %q failed: %w", memberName, err) + return nil, fmt.Errorf("Marshalling cluster member resources for %q failed: %w", memberName, err) } return rv, nil @@ -148,7 +148,7 @@ func InstancePlacementRun(ctx context.Context, l logger.Logger, s *state.State, rv, err := StarlarkMarshal(memberState) if err != nil { - return nil, fmt.Errorf("Marshalling member state for %q failed: %w", memberName, err) + return nil, fmt.Errorf("Marshalling cluster member state for %q failed: %w", memberName, err) } return rv, nil @@ -283,7 +283,35 @@ func InstancePlacementRun(ctx context.Context, l logger.Logger, s *state.State, rv, err := StarlarkMarshal(instanceList) if err != nil { - return nil, fmt.Errorf("Marshalling instance resources failed: %w", err) + return nil, fmt.Errorf("Marshalling instances failed: %w", err) + } + + return rv, nil + } + + getInstancesCountFunc := func(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) { + var projectName string + var locationName string + var includePending bool + + err := starlark.UnpackArgs(b.Name(), args, kwargs, "project??", &projectName, "location??", &locationName, "pending??", &includePending) + if err != nil { + return nil, err + } + + var count int + + err = s.DB.Cluster.Transaction(ctx, func(ctx context.Context, tx *db.ClusterTx) error { + count, err = tx.GetInstancesCount(ctx, projectName, locationName, includePending) + return err + }) + if err != nil { + return nil, err + } + + rv, err := StarlarkMarshal(count) + if err != nil { + return nil, fmt.Errorf("Marshalling instance count failed: %w", err) } return rv, nil @@ -371,7 +399,7 @@ func InstancePlacementRun(ctx context.Context, l logger.Logger, s *state.State, rv, err := StarlarkMarshal(allMembersInfo) if err != nil { - return nil, fmt.Errorf("Marshalling instance resources failed: %w", err) + return nil, fmt.Errorf("Marshalling cluster members failed: %w", err) } return rv, nil @@ -406,7 +434,7 @@ func InstancePlacementRun(ctx context.Context, l logger.Logger, s *state.State, rv, err := StarlarkMarshal(p) if err != nil { - return nil, fmt.Errorf("Marshalling instance resources failed: %w", err) + return nil, fmt.Errorf("Marshalling project failed: %w", err) } return rv, nil @@ -478,6 +506,7 @@ func InstancePlacementRun(ctx context.Context, l logger.Logger, s *state.State, "get_cluster_member_state": starlark.NewBuiltin("get_cluster_member_state", getClusterMemberStateFunc), "get_instance_resources": starlark.NewBuiltin("get_instance_resources", getInstanceResourcesFunc), "get_instances": starlark.NewBuiltin("get_instances", getInstancesFunc), + "get_instances_count": starlark.NewBuiltin("get_instances_count", getInstancesCountFunc), "get_cluster_members": starlark.NewBuiltin("get_cluster_members", getClusterMembersFunc), "get_project": starlark.NewBuiltin("get_project", getProjectFunc), } diff --git a/internal/server/scriptlet/load/load.go b/internal/server/scriptlet/load/load.go index 2dbb9af0d8c..b121787d5db 100644 --- a/internal/server/scriptlet/load/load.go +++ b/internal/server/scriptlet/load/load.go @@ -78,6 +78,7 @@ func InstancePlacementCompile(name string, src string) (*starlark.Program, error "get_cluster_member_state", "get_instance_resources", "get_instances", + "get_instances_count", "get_cluster_members", "get_project", }) diff --git a/internal/version/api.go b/internal/version/api.go index 1b2cc5e3cbe..798c014bac4 100644 --- a/internal/version/api.go +++ b/internal/version/api.go @@ -447,6 +447,7 @@ var APIExtensions = []string{ "instance_nic_macvlan_mode", "storage_lvm_cluster_create", "network_ovn_external_interfaces", + "instances_scriptlet_get_instances_count", } // APIExtensionsCount returns the number of available API extensions. diff --git a/test/suites/clustering_instance_placement_scriptlet.sh b/test/suites/clustering_instance_placement_scriptlet.sh index a6a15770d9c..e9d74bfdca9 100644 --- a/test/suites/clustering_instance_placement_scriptlet.sh +++ b/test/suites/clustering_instance_placement_scriptlet.sh @@ -74,8 +74,9 @@ def instance_placement(request, candidate_members): log_info("instance placement member resources: ", get_cluster_member_resources(member.server_name)) log_info("instance placement member state: ", get_cluster_member_state(member.server_name)) - # Set statically target to 2nd member. - set_target(candidate_members[1].server_name) + # Set statically target to 2nd member (alphabetical). + candidate_names = sorted([candidate.server_name for candidate in candidate_members]) + set_target(candidate_names[1]) return # No error. EOF @@ -151,7 +152,8 @@ def instance_placement(request, candidate_members): # Set statically target to 3rd member. # Note: We expect the candidate members to not contain the member being evacuated, and thus the 3rd # member is the 2nd entry in the candidate_members list now. - set_target(candidate_members[1].server_name) + candidate_names = sorted([candidate.server_name for candidate in candidate_members]) + set_target(candidate_names[1]) return # No error. EOF diff --git a/test/suites/clustering_move.sh b/test/suites/clustering_move.sh index 39e778edb80..1556cddbc4f 100644 --- a/test/suites/clustering_move.sh +++ b/test/suites/clustering_move.sh @@ -111,7 +111,8 @@ def instance_placement(request, candidate_members): return "Expecting reason relocation" # Set statically target to 1st member. - set_target(candidate_members[0].server_name) + candidate_names = sorted([candidate.server_name for candidate in candidate_members]) + set_target(candidate_names[0]) return EOF