From 191663e5bcea29e042dd4d2ac02239609286fd40 Mon Sep 17 00:00:00 2001 From: Sherif Akoush Date: Thu, 28 Apr 2022 18:09:31 +0100 Subject: [PATCH] Update memory.go (#173) always reschedule models from disconnecting agents. --- scheduler/pkg/store/memory.go | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/scheduler/pkg/store/memory.go b/scheduler/pkg/store/memory.go index ce5fdb7d4e..428aa1031a 100644 --- a/scheduler/pkg/store/memory.go +++ b/scheduler/pkg/store/memory.go @@ -494,29 +494,24 @@ func (m *MemoryStore) RemoveServerReplica(serverName string, replicaIdx int) ([] } delete(server.replicas, replicaIdx) //TODO we should not reschedule models on servers with dedicated models, e.g. non shareable servers - if server.expectedReplicas < replicaIdx+1 { // this is a real deletion of a server replica - if len(server.replicas) == 0 { - delete(m.store.servers, serverName) - } - var modelNames []string - // Find models to reschedule due to this server replica being removed - for modelVersionID := range serverReplica.loadedModels { - model, ok := m.store.models[modelVersionID.Name] - if ok { - modelVersion := model.GetVersion(modelVersionID.Version) - if modelVersion != nil { - delete(modelVersion.replicas, replicaIdx) - modelNames = append(modelNames, modelVersionID.Name) - } else { - logger.Warnf("Can't find model version %s", modelVersionID.String()) - } + if len(server.replicas) == 0 { + delete(m.store.servers, serverName) + } + var modelNames []string + // Find models to reschedule due to this server replica being removed + for modelVersionID := range serverReplica.loadedModels { + model, ok := m.store.models[modelVersionID.Name] + if ok { + modelVersion := model.GetVersion(modelVersionID.Version) + if modelVersion != nil { + delete(modelVersion.replicas, replicaIdx) + modelNames = append(modelNames, modelVersionID.Name) + } else { + logger.Warnf("Can't find model version %s", modelVersionID.String()) } } - return modelNames, nil - } else { - return []string{}, nil } - + return modelNames, nil } func (m *MemoryStore) ServerNotify(request *pb.ServerNotifyRequest) error {