Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

quorum-based query context #134

Open
wants to merge 2 commits into
base: db_main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/store/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -1611,6 +1611,7 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, seriesSrv storepb.Store
false,
s.metrics.emptyPostingCount.WithLabelValues(tenant),
nil,
nil,
)
} else {
resp = newLazyRespSet(
Expand Down
36 changes: 34 additions & 2 deletions pkg/store/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,15 @@ func (s *ProxyStore) TSDBInfos() []infopb.TSDBInfo {
return infos
}

type quorumGroup struct {
quorumGroupKey string
context context.Context
cancel context.CancelFunc
quorumValue int64
quorumCounter *int64
replicas int
}

func (s *ProxyStore) Series(originalRequest *storepb.SeriesRequest, srv storepb.Store_SeriesServer) error {
// TODO(bwplotka): This should be part of request logger, otherwise it does not make much sense. Also, could be
// triggered by tracing span to reduce cognitive load.
Expand Down Expand Up @@ -385,10 +394,32 @@ func (s *ProxyStore) Series(originalRequest *storepb.SeriesRequest, srv storepb.
}
defer logGroupReplicaErrors()

level.Debug(reqLogger).Log("s.retrievalStrategy", s.retrievalStrategy)

quorumGroups := make(map[string]*quorumGroup)
for _, st := range stores {
st := st
if quorumGroups[st.GroupKey()] == nil {
groupCtx, cancel := context.WithCancel(ctx)
quorumGroups[st.GroupKey()] = &quorumGroup{
context: groupCtx,
cancel: cancel,
quorumGroupKey: st.GroupKey(),
quorumValue: 2,
quorumCounter: new(int64),
replicas: 1,
}
} else {
quorumGroups[st.GroupKey()].replicas++
}
}
level.Debug(reqLogger).Log("quorumGroups", quorumGroups)

respSet, err := newAsyncRespSet(ctx, st, r, s.responseTimeout, s.retrievalStrategy, &s.buffers, r.ShardInfo, reqLogger, s.metrics.emptyStreamResponses)
for _, st := range stores {
st := st
level.Debug(reqLogger).Log("store", st.String(), "store.group", st.GroupKey(), "store.replica", st.ReplicaKey())
level.Debug(reqLogger).Log("store response timeout", s.responseTimeout)
qg := quorumGroups[st.GroupKey()]
respSet, err := newAsyncRespSet(qg.context, st, r, s.responseTimeout, s.retrievalStrategy, &s.buffers, r.ShardInfo, reqLogger, s.metrics.emptyStreamResponses, qg)
if err != nil {
level.Warn(s.logger).Log("msg", "Store failure", "group", st.GroupKey(), "replica", st.ReplicaKey(), "err", err)
s.metrics.storeFailureCount.WithLabelValues(st.GroupKey(), st.ReplicaKey()).Inc()
Expand Down Expand Up @@ -418,6 +449,7 @@ func (s *ProxyStore) Series(originalRequest *storepb.SeriesRequest, srv storepb.
level.Debug(reqLogger).Log("msg", "Series: started fanout streams", "status", strings.Join(storeDebugMsgs, ";"))

var respHeap seriesStream = NewProxyResponseLoserTree(storeResponses...)

if s.enableDedup {
respHeap = NewResponseDeduplicatorInternal(respHeap, s.quorumChunkDedup)
}
Expand Down
8 changes: 8 additions & 0 deletions pkg/store/proxy_merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"io"
"sort"
"sync"
"sync/atomic"

Check failure on line 12 in pkg/store/proxy_merge.go

View workflow job for this annotation

GitHub Actions / Linters (Static Analysis) for Go

package "sync/atomic" shouldn't be imported, suggested: "go.uber.org/atomic"
"time"

"github.com/cespare/xxhash/v2"
Expand Down Expand Up @@ -474,6 +475,7 @@
shardInfo *storepb.ShardInfo,
logger log.Logger,
emptyStreamResponses prometheus.Counter,
qg *quorumGroup,
) (respSet, error) {

var (
Expand Down Expand Up @@ -549,6 +551,7 @@
applySharding,
emptyStreamResponses,
labelsToRemove,
qg,
), nil
default:
panic(fmt.Sprintf("unsupported retrieval strategy %s", retrievalStrategy))
Expand Down Expand Up @@ -602,6 +605,7 @@
applySharding bool,
emptyStreamResponses prometheus.Counter,
removeLabels map[string]struct{},
qg *quorumGroup,
) respSet {
ret := &eagerRespSet{
span: span,
Expand Down Expand Up @@ -707,6 +711,10 @@
// Generally we need to resort here.
sortWithoutLabels(l.bufferedResponses, l.removeLabels)

if qg != nil && atomic.AddInt64(qg.quorumCounter, 1) == qg.quorumValue {
qg.cancel()
}

}(ret)

return ret
Expand Down
Loading