Skip to content

Commit e938ac0

Browse files
[FIXED] Preserve max delivered messages with Interest retention (#6575)
Resolves #6538 If a consumer reached max deliveries for a message, it should preserve the redelivered state and allow inspecting its content. However, if a new consumer would be created and consume this message as well, it would still be removed under Interest retention. This PR fixes that by using the redelivered state to keep marking there's interest. Only downside is that the redelivered state gets cleaned up after a restart (this PR does not change/fix that). So if the consumer that had a max delivery message keeps acknowledging messages and its acknowledgement floor moves up, it would clean up the redelivered state below this ack floor. Honestly I feel like keeping messages around if max delivery is reached makes the code very complex. It would be a lot cleaner if we'd only have the acknowledgement floor, starting sequence, and pending messages in-between, not also redelivered state that can be below ack floor. It's not something we can change now I suppose, but I'd be in favor of having messages automatically be removed once max delivery is reached and all consumers have consumed the message. DLQ-style behavior would then be more explicitly (and reliably) handled by the client, for example by publishing into another stream and then TERM the message, instead of relying on advisories that could be missed. Signed-off-by: Maurice van Veen <github@mauricevanveen.com>
2 parents a42b2e9 + 6caa858 commit e938ac0

5 files changed

+185
-21
lines changed

server/consumer.go

+25-19
Original file line numberDiff line numberDiff line change
@@ -1985,11 +1985,16 @@ func (o *consumer) hasMaxDeliveries(seq uint64) bool {
19851985
if o.maxp > 0 && len(o.pending) >= o.maxp {
19861986
o.signalNewMessages()
19871987
}
1988-
// Cleanup our tracking.
1989-
delete(o.pending, seq)
1990-
if o.rdc != nil {
1991-
delete(o.rdc, seq)
1988+
// Make sure to remove from pending.
1989+
if p, ok := o.pending[seq]; ok && p != nil {
1990+
delete(o.pending, seq)
1991+
o.updateDelivered(p.Sequence, seq, dc, p.Timestamp)
1992+
}
1993+
// Ensure redelivered state is set, if not already.
1994+
if o.rdc == nil {
1995+
o.rdc = make(map[uint64]uint64)
19921996
}
1997+
o.rdc[seq] = dc
19931998
return true
19941999
}
19952000
return false
@@ -3264,6 +3269,7 @@ func (o *consumer) needAck(sseq uint64, subj string) bool {
32643269
var needAck bool
32653270
var asflr, osseq uint64
32663271
var pending map[uint64]*Pending
3272+
var rdc map[uint64]uint64
32673273

32683274
o.mu.RLock()
32693275
defer o.mu.RUnlock()
@@ -3288,7 +3294,7 @@ func (o *consumer) needAck(sseq uint64, subj string) bool {
32883294
}
32893295
if o.isLeader() {
32903296
asflr, osseq = o.asflr, o.sseq
3291-
pending = o.pending
3297+
pending, rdc = o.pending, o.rdc
32923298
} else {
32933299
if o.store == nil {
32943300
return false
@@ -3299,7 +3305,7 @@ func (o *consumer) needAck(sseq uint64, subj string) bool {
32993305
return sseq > o.asflr && !o.isFiltered()
33003306
}
33013307
// If loading state as here, the osseq is +1.
3302-
asflr, osseq, pending = state.AckFloor.Stream, state.Delivered.Stream+1, state.Pending
3308+
asflr, osseq, pending, rdc = state.AckFloor.Stream, state.Delivered.Stream+1, state.Pending, state.Redelivered
33033309
}
33043310

33053311
switch o.cfg.AckPolicy {
@@ -3315,6 +3321,12 @@ func (o *consumer) needAck(sseq uint64, subj string) bool {
33153321
}
33163322
}
33173323

3324+
// Finally check if redelivery of this message is tracked.
3325+
// If the message is not pending, it should be preserved if it reached max delivery.
3326+
if !needAck {
3327+
_, needAck = rdc[sseq]
3328+
}
3329+
33183330
return needAck
33193331
}
33203332

@@ -3917,7 +3929,10 @@ func (o *consumer) deliveryCount(seq uint64) uint64 {
39173929
if o.rdc == nil {
39183930
return 1
39193931
}
3920-
return o.rdc[seq]
3932+
if dc := o.rdc[seq]; dc >= 1 {
3933+
return dc
3934+
}
3935+
return 1
39213936
}
39223937

39233938
// Increase the delivery count for this message.
@@ -4231,10 +4246,7 @@ func (o *consumer) checkAckFloor() {
42314246
// Check if this message was pending.
42324247
o.mu.RLock()
42334248
p, isPending := o.pending[seq]
4234-
var rdc uint64 = 1
4235-
if o.rdc != nil {
4236-
rdc = o.rdc[seq]
4237-
}
4249+
rdc := o.deliveryCount(seq)
42384250
o.mu.RUnlock()
42394251
// If it was pending for us, get rid of it.
42404252
if isPending {
@@ -4252,10 +4264,7 @@ func (o *consumer) checkAckFloor() {
42524264
if p != nil {
42534265
dseq = p.Sequence
42544266
}
4255-
var rdc uint64 = 1
4256-
if o.rdc != nil {
4257-
rdc = o.rdc[seq]
4258-
}
4267+
rdc := o.deliveryCount(seq)
42594268
toTerm = append(toTerm, seq, dseq, rdc)
42604269
}
42614270
}
@@ -5861,10 +5870,7 @@ func (o *consumer) decStreamPending(sseq uint64, subj string) {
58615870

58625871
// Check if this message was pending.
58635872
p, wasPending := o.pending[sseq]
5864-
var rdc uint64 = 1
5865-
if o.rdc != nil {
5866-
rdc = o.rdc[sseq]
5867-
}
5873+
rdc := o.deliveryCount(sseq)
58685874

58695875
o.mu.Unlock()
58705876

server/jetstream_cluster_1_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -6624,7 +6624,7 @@ func TestJetStreamClusterMaxDeliveriesOnInterestStreams(t *testing.T) {
66246624
require_Equal(t, ci.AckFloor.Consumer, 1)
66256625
require_Equal(t, ci.AckFloor.Stream, 1)
66266626
require_Equal(t, ci.NumAckPending, 0)
6627-
require_Equal(t, ci.NumRedelivered, 0)
6627+
require_Equal(t, ci.NumRedelivered, 1)
66286628
require_Equal(t, ci.NumPending, 0)
66296629
}
66306630
}

server/jetstream_cluster_3_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -5426,7 +5426,7 @@ func TestJetStreamClusterConsumerMaxDeliveryNumAckPendingBug(t *testing.T) {
54265426
require_Equal(t, a.AckFloor.Stream, 10)
54275427
}
54285428
require_Equal(t, a.NumPending, 40)
5429-
require_Equal(t, a.NumRedelivered, 0)
5429+
require_Equal(t, a.NumRedelivered, 10)
54305430
a.Cluster, b.Cluster = nil, nil
54315431
a.Delivered.Last, b.Delivered.Last = nil, nil
54325432
if !reflect.DeepEqual(a, b) {

server/jetstream_consumer_test.go

+53
Original file line numberDiff line numberDiff line change
@@ -2714,3 +2714,56 @@ func TestJetStreamConsumerMessageDeletedDuringRedelivery(t *testing.T) {
27142714
})
27152715
}
27162716
}
2717+
2718+
func TestJetStreamConsumerDeliveryCount(t *testing.T) {
2719+
s := RunBasicJetStreamServer(t)
2720+
defer s.Shutdown()
2721+
2722+
nc, js := jsClientConnect(t, s)
2723+
defer nc.Close()
2724+
2725+
_, err := js.AddStream(&nats.StreamConfig{
2726+
Name: "TEST",
2727+
Subjects: []string{"foo"},
2728+
})
2729+
require_NoError(t, err)
2730+
2731+
for i := 0; i < 2; i++ {
2732+
_, err = js.Publish("foo", nil)
2733+
require_NoError(t, err)
2734+
}
2735+
2736+
sub, err := js.PullSubscribe(
2737+
"foo",
2738+
"CONSUMER",
2739+
nats.ManualAck(),
2740+
nats.AckExplicit(),
2741+
nats.AckWait(time.Second),
2742+
nats.MaxDeliver(1),
2743+
)
2744+
require_NoError(t, err)
2745+
2746+
acc, err := s.lookupAccount(globalAccountName)
2747+
require_NoError(t, err)
2748+
mset, err := acc.lookupStream("TEST")
2749+
require_NoError(t, err)
2750+
o := mset.lookupConsumer("CONSUMER")
2751+
require_NotNil(t, o)
2752+
2753+
msgs, err := sub.Fetch(2)
2754+
require_NoError(t, err)
2755+
require_Len(t, len(msgs), 2)
2756+
require_NoError(t, msgs[1].Nak())
2757+
2758+
require_Equal(t, o.deliveryCount(1), 1)
2759+
require_Equal(t, o.deliveryCount(2), 1)
2760+
2761+
// max deliver 1 so this will fail
2762+
_, err = sub.Fetch(1, nats.MaxWait(250*time.Millisecond))
2763+
require_Error(t, err)
2764+
2765+
// This would previously report delivery count 0, because o.rdc!=nil
2766+
require_Equal(t, o.deliveryCount(1), 1)
2767+
require_Equal(t, o.deliveryCount(2), 1)
2768+
2769+
}

server/jetstream_test.go

+105
Original file line numberDiff line numberDiff line change
@@ -25542,3 +25542,108 @@ func TestJetStreamSubjectDeleteMarkersAfterPurgeNoMarkers(t *testing.T) {
2554225542
})
2554325543
}
2554425544
}
25545+
25546+
// https://github.com/nats-io/nats-server/issues/6538
25547+
func TestJetStreamInterestMaxDeliveryReached(t *testing.T) {
25548+
maxWait := 250 * time.Millisecond
25549+
for _, useNak := range []bool{true, false} {
25550+
for _, test := range []struct {
25551+
title string
25552+
action func(s *Server, sub *nats.Subscription)
25553+
}{
25554+
{
25555+
title: "fetch",
25556+
action: func(s *Server, sub *nats.Subscription) {
25557+
time.Sleep(time.Second)
25558+
25559+
// max deliver 1 so this will fail
25560+
_, err := sub.Fetch(1, nats.MaxWait(maxWait))
25561+
require_Error(t, err)
25562+
},
25563+
},
25564+
{
25565+
title: "expire pending",
25566+
action: func(s *Server, sub *nats.Subscription) {
25567+
acc, err := s.lookupAccount(globalAccountName)
25568+
require_NoError(t, err)
25569+
mset, err := acc.lookupStream("TEST")
25570+
require_NoError(t, err)
25571+
o := mset.lookupConsumer("consumer")
25572+
require_NotNil(t, o)
25573+
25574+
o.mu.Lock()
25575+
o.forceExpirePending()
25576+
o.mu.Unlock()
25577+
},
25578+
},
25579+
} {
25580+
title := fmt.Sprintf("nak/%s", test.title)
25581+
if !useNak {
25582+
title = fmt.Sprintf("no-%s", title)
25583+
}
25584+
t.Run(title, func(t *testing.T) {
25585+
s := RunBasicJetStreamServer(t)
25586+
defer s.Shutdown()
25587+
25588+
nc, js := jsClientConnect(t, s)
25589+
defer nc.Close()
25590+
25591+
_, err := js.AddStream(&nats.StreamConfig{
25592+
Name: "TEST",
25593+
Storage: nats.FileStorage,
25594+
Subjects: []string{"test"},
25595+
Replicas: 1,
25596+
Retention: nats.InterestPolicy,
25597+
})
25598+
require_NoError(t, err)
25599+
25600+
sub, err := js.PullSubscribe("test", "consumer", nats.AckWait(time.Second), nats.MaxDeliver(1))
25601+
require_NoError(t, err)
25602+
25603+
_, err = nc.Request("test", []byte("hello"), maxWait)
25604+
require_NoError(t, err)
25605+
25606+
nfo, err := js.StreamInfo("TEST")
25607+
require_NoError(t, err)
25608+
require_Equal(t, nfo.State.Msgs, uint64(1))
25609+
25610+
msg, err := sub.Fetch(1, nats.MaxWait(maxWait))
25611+
require_NoError(t, err)
25612+
require_Len(t, 1, len(msg))
25613+
if useNak {
25614+
require_NoError(t, msg[0].Nak())
25615+
}
25616+
25617+
cnfo, err := js.ConsumerInfo("TEST", "consumer")
25618+
require_NoError(t, err)
25619+
require_Equal(t, cnfo.NumAckPending, 1)
25620+
25621+
test.action(s, sub)
25622+
25623+
// max deliver 1 so this will fail
25624+
_, err = sub.Fetch(1, nats.MaxWait(maxWait))
25625+
require_Error(t, err)
25626+
25627+
cnfo, err = js.ConsumerInfo("TEST", "consumer")
25628+
require_NoError(t, err)
25629+
require_Equal(t, cnfo.NumAckPending, 0)
25630+
25631+
nfo, err = js.StreamInfo("TEST")
25632+
require_NoError(t, err)
25633+
require_Equal(t, nfo.State.Msgs, uint64(1))
25634+
25635+
sub2, err := js.PullSubscribe("test", "consumer2")
25636+
require_NoError(t, err)
25637+
25638+
msg, err = sub2.Fetch(1)
25639+
require_NoError(t, err)
25640+
require_Len(t, 1, len(msg))
25641+
require_NoError(t, msg[0].AckSync())
25642+
25643+
nfo, err = js.StreamInfo("TEST")
25644+
require_NoError(t, err)
25645+
require_Equal(t, nfo.State.Msgs, uint64(1))
25646+
})
25647+
}
25648+
}
25649+
}

0 commit comments

Comments
 (0)