Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: new listen strategy with worker-side heartbeats #308

Merged
merged 3 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion api-contracts/dispatcher/dispatcher.proto
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ service Dispatcher {

rpc Listen(WorkerListenRequest) returns (stream AssignedAction) {}

// ListenV2 is like listen, but implementation does not include heartbeats. This should only used by SDKs
// against engine version v0.18.1+
rpc ListenV2(WorkerListenRequest) returns (stream AssignedAction) {}

// Heartbeat is a method for workers to send heartbeats to the dispatcher
rpc Heartbeat(HeartbeatRequest) returns (HeartbeatResponse) {}

rpc SubscribeToWorkflowEvents(SubscribeToWorkflowEventsRequest) returns (stream WorkflowEvent) {}

rpc SendStepActionEvent(StepActionEvent) returns (ActionEventResponse) {}
Expand Down Expand Up @@ -238,4 +245,14 @@ message OverridesData {
string callerFilename = 4;
}

message OverridesDataResponse {}
message OverridesDataResponse {}

message HeartbeatRequest {
// the id of the worker
string workerId = 1;

// heartbeatAt is the time the worker sent the heartbeat
google.protobuf.Timestamp heartbeatAt = 2;
}

message HeartbeatResponse {}
295 changes: 218 additions & 77 deletions internal/services/dispatcher/contracts/dispatcher.pb.go

Large diffs are not rendered by default.

107 changes: 106 additions & 1 deletion internal/services/dispatcher/contracts/dispatcher_grpc.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

94 changes: 94 additions & 0 deletions internal/services/dispatcher/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,100 @@ func (s *DispatcherImpl) Listen(request *contracts.WorkerListenRequest, stream c
}
}

// ListenV2 is like Listen, but implementation does not include heartbeats. This should only used by SDKs
// against engine version v0.18.1+
func (s *DispatcherImpl) ListenV2(request *contracts.WorkerListenRequest, stream contracts.Dispatcher_ListenV2Server) error {
tenant := stream.Context().Value("tenant").(*dbsqlc.Tenant)
tenantId := sqlchelpers.UUIDToStr(tenant.ID)

s.l.Debug().Msgf("Received subscribe request from ID: %s", request.WorkerId)

worker, err := s.repo.Worker().GetWorkerForEngine(tenantId, request.WorkerId)

if err != nil {
s.l.Error().Err(err).Msgf("could not get worker %s", request.WorkerId)
return err
}

// check the worker's dispatcher against the current dispatcher. if they don't match, then update the worker
if worker.DispatcherId.Valid && sqlchelpers.UUIDToStr(worker.DispatcherId) != s.dispatcherId {
_, err = s.repo.Worker().UpdateWorker(tenantId, request.WorkerId, &repository.UpdateWorkerOpts{
DispatcherId: &s.dispatcherId,
})

if err != nil {
s.l.Error().Err(err).Msgf("could not update worker %s dispatcher", request.WorkerId)
return err
}
}

fin := make(chan bool)

s.workers.Store(request.WorkerId, subscribedWorker{stream: stream, finished: fin})

defer func() {
// non-blocking send
select {
case fin <- true:
default:
}

s.workers.Delete(request.WorkerId)

inactive := db.WorkerStatusInactive

_, err := s.repo.Worker().UpdateWorker(tenantId, request.WorkerId, &repository.UpdateWorkerOpts{
Status: &inactive,
})

if err != nil {
s.l.Error().Err(err).Msgf("could not update worker %s status to inactive", request.WorkerId)
}
}()

ctx := stream.Context()

// Keep the connection alive for sending messages
for {
select {
case <-fin:
s.l.Debug().Msgf("closing stream for worker id: %s", request.WorkerId)
return nil
case <-ctx.Done():
s.l.Debug().Msgf("worker id %s has disconnected", request.WorkerId)
return nil
}
}
}

const HeartbeatInterval = 4 * time.Second

// Heartbeat is used to update the last heartbeat time for a worker
func (s *DispatcherImpl) Heartbeat(ctx context.Context, req *contracts.HeartbeatRequest) (*contracts.HeartbeatResponse, error) {
tenant := ctx.Value("tenant").(*dbsqlc.Tenant)
tenantId := sqlchelpers.UUIDToStr(tenant.ID)

heartbeatAt := time.Now().UTC()

s.l.Debug().Msgf("Received heartbeat request from ID: %s", req.WorkerId)

// if heartbeat time is greater than expected heartbeat interval, show a warning
if req.HeartbeatAt.AsTime().Before(heartbeatAt.Add(-1 * HeartbeatInterval)) {
s.l.Warn().Msgf("heartbeat time is greater than expected heartbeat interval")
}

_, err := s.repo.Worker().UpdateWorker(tenantId, req.WorkerId, &repository.UpdateWorkerOpts{
// use the system time for heartbeat
LastHeartbeatAt: &heartbeatAt,
})

if err != nil {
return nil, err
}

return &contracts.HeartbeatResponse{}, nil
}

// SubscribeToWorkflowEvents registers workflow events with the dispatcher
func (s *DispatcherImpl) SubscribeToWorkflowEvents(request *contracts.SubscribeToWorkflowEventsRequest, stream contracts.Dispatcher_SubscribeToWorkflowEventsServer) error {
tenant := stream.Context().Value("tenant").(*dbsqlc.Tenant)
Expand Down
Loading
Loading