Skip to content

Commit

Permalink
Merge branch 'apache:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
rayluoluo authored Jan 13, 2025
2 parents 0ac4a4b + ba04a43 commit 6a2ebad
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 0 deletions.
60 changes: 60 additions & 0 deletions pip/pip-399.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

# PIP-399: Fix Metric Name for Delayed Queue

# Background knowledge

Pulsar delayed delivery is a feature that allows messages to be delivered to consumers after a certain delay.
It will expose a metric to monitor the memory usage of delayed queue.

# Motivation

There is already one metric called `pulsar_delayed_message_index_size_bytes` for the total memory occupation used by delayed queue of **one topic**.
```
writeMetric(stream, "pulsar_delayed_message_index_size_bytes", stats.delayedTrackerMemoryUsage,
cluster, namespace, topic, splitTopicAndPartitionIndexLabel);
```

Whereas, the metric for **one sub** also called `pulsar_delayed_message_index_size_bytes`, which do not comform the metric name norm and is confusing.
```
writeSubscriptionMetric(stream, "pulsar_delayed_message_index_size_bytes",
subsStats.delayedTrackerMemoryUsage, cluster, namespace, topic, sub, splitTopicAndPartitionIndexLabel);
```

Currently, it can export metric like:
```
# TYPE pulsar_delayed_message_index_size_bytes gauge
pulsar_delayed_message_index_size_bytes{cluster="MyPulsar",namespace="public/default",topic="persistent://public/default/testNack-partition-0"} 0
pulsar_delayed_message_index_size_bytes{cluster="MyPulsar",namespace="public/default",topic="persistent://public/default/testNack-partition-0",subscription="sub2"} 0
```

The metric of topic and subscription mix together. If we want to filter out the metric of sub to pick out the metric of topic, we need to use promsql like:
`pulsar_delayed_message_index_size_bytes{subscription=""}`
It is quite weird and not friendly to use.


# Goals

Rename the metric for **one sub** to `pulsar_subscription_delayed_message_index_size_bytes`.


# Backward & Forward Compatibility

## Upgrade

Rename the metric for **one sub** to `pulsar_subscription_delayed_message_index_size_bytes`.

## Downgrade / Rollback

Rename the metric for **one sub** to `pulsar_delayed_message_index_size_bytes`.



# General Notes

# Links

<!--
Updated afterwards
-->
* Mailing List discussion thread: https://lists.apache.org/thread/b8rqld3cww1t34zntgmld50yz34lxx1d
* Mailing List voting thread: https://lists.apache.org/thread/cyyx29ggjdpbr3kq5vvd6tk83f9vc112
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.pulsar.broker.service.persistent;

import static org.apache.pulsar.common.util.Runnables.catchingAndLoggingThrowables;
import com.google.common.annotations.VisibleForTesting;
import io.netty.buffer.ByteBuf;
import io.prometheus.client.Counter;
import io.prometheus.client.Gauge;
Expand Down Expand Up @@ -220,6 +221,23 @@ private void receiveSubscriptionUpdated(ReplicatedSubscriptionsUpdate update) {
private void startNewSnapshot() {
cleanupTimedOutSnapshots();

if (lastCompletedSnapshotStartTime == 0 && !pendingSnapshots.isEmpty()) {
// 1. If the remote cluster has disabled subscription replication or there's an incorrect config,
// it will not respond to SNAPSHOT_REQUEST. Therefore, lastCompletedSnapshotStartTime will remain 0,
// making it unnecessary to resend the request.
// 2. This approach prevents sending additional SNAPSHOT_REQUEST to both local_topic and remote_topic.
// 3. Since it's uncertain when the remote cluster will enable subscription replication,
// the timeout mechanism of pendingSnapshots is used to ensure retries.
//
// In other words, when hit this case, The frequency of sending SNAPSHOT_REQUEST
// will use `replicatedSubscriptionsSnapshotTimeoutSeconds`.
if (log.isDebugEnabled()) {
log.debug("[{}] PendingSnapshot exists but has never succeeded. "
+ "Skipping snapshot creation until pending snapshot timeout.", topic.getName());
}
return;
}

if (topic.getLastMaxReadPositionMovedForwardTimestamp() < lastCompletedSnapshotStartTime
|| topic.getLastMaxReadPositionMovedForwardTimestamp() == 0) {
// There was no message written since the last snapshot, we can skip creating a new snapshot
Expand Down Expand Up @@ -324,6 +342,11 @@ String localCluster() {
return localCluster;
}

@VisibleForTesting
public ConcurrentMap<String, ReplicatedSubscriptionsSnapshotBuilder> pendingSnapshots() {
return pendingSnapshots;
}

@Override
public boolean isMarkerMessage() {
// Everything published by this controller will be a marker a message
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@
import org.apache.pulsar.client.api.Schema;
import org.apache.pulsar.client.api.SubscriptionInitialPosition;
import org.apache.pulsar.client.api.SubscriptionType;
import org.apache.pulsar.client.api.TransactionIsolationLevel;
import org.apache.pulsar.client.impl.MessageImpl;
import org.apache.pulsar.common.api.proto.MarkerType;
import org.apache.pulsar.common.api.proto.MessageMetadata;
import org.apache.pulsar.common.policies.data.PartitionedTopicStats;
import org.apache.pulsar.common.policies.data.PersistentTopicInternalStats;
import org.apache.pulsar.common.policies.data.TenantInfoImpl;
Expand Down Expand Up @@ -1002,6 +1006,92 @@ public void testReplicatedSubscriptionWithCompaction() throws Exception {
Assert.assertEquals(result, List.of("V2"));
}

@Test
public void testReplicatedSubscriptionOneWay() throws Exception {
final String namespace = BrokerTestUtil.newUniqueName("pulsar-r4/replicatedsubscription");
final String topicName = "persistent://" + namespace + "/one-way";
int defaultSubscriptionsSnapshotFrequency = config1.getReplicatedSubscriptionsSnapshotFrequencyMillis();
int defaultSubscriptionsSnapshotTimeout = config1.getReplicatedSubscriptionsSnapshotTimeoutSeconds();
config1.setReplicatedSubscriptionsSnapshotTimeoutSeconds(2);
config1.setReplicatedSubscriptionsSnapshotFrequencyMillis(100);

// cluster4 disabled ReplicatedSubscriptions
admin1.tenants().createTenant("pulsar-r4",
new TenantInfoImpl(Sets.newHashSet("appid1", "appid4"), Sets.newHashSet(cluster1, cluster4)));
admin1.namespaces().createNamespace(namespace);
admin1.namespaces().setNamespaceReplicationClusters(namespace, Sets.newHashSet(cluster1, cluster4));

String subscriptionName = "cluster-subscription";
boolean replicateSubscriptionState = true;

@Cleanup
PulsarClient client1 = PulsarClient.builder().serviceUrl(url1.toString())
.statsInterval(0, TimeUnit.SECONDS)
.build();

@Cleanup
final PulsarClient client4 = PulsarClient.builder().serviceUrl(url4.toString())
.statsInterval(0, TimeUnit.SECONDS)
.build();

// create subscription in cluster4
createReplicatedSubscription(client1, topicName, subscriptionName, replicateSubscriptionState);
// create subscription in cluster4
createReplicatedSubscription(client4, topicName, subscriptionName, replicateSubscriptionState);

// send messages in cluster1
@Cleanup
Producer<byte[]> producer = client1.newProducer().topic(topicName)
.enableBatching(false)
.messageRoutingMode(MessageRoutingMode.SinglePartition)
.create();
int numMessages = 6;
for (int i = 0; i < numMessages; i++) {
String body = "message" + i;
producer.send(body.getBytes(StandardCharsets.UTF_8));
}
producer.close();

// wait for snapshot marker request to be replicated
Thread.sleep(3 * config1.getReplicatedSubscriptionsSnapshotFrequencyMillis());

// Assert just have 1 pending snapshot in cluster1
final PersistentTopic topic1 =
(PersistentTopic) pulsar1.getBrokerService().getTopic(topicName, false).join().get();
ReplicatedSubscriptionsController r1Controller =
topic1.getReplicatedSubscriptionController().get();
assertEquals(r1Controller.pendingSnapshots().size(), 1);

// Assert cluster4 just receive 1 snapshot request msg
int numSnapshotRequest = 0;
List<Message<byte[]>> r4Messages = admin4.topics()
.peekMessages(topicName, subscriptionName, 100, true, TransactionIsolationLevel.READ_UNCOMMITTED);
for (Message<byte[]> r4Message : r4Messages) {
MessageMetadata msgMetadata = ((MessageImpl<byte[]>) r4Message).getMessageBuilder();
if (msgMetadata.hasMarkerType() && msgMetadata.getMarkerType() == MarkerType.REPLICATED_SUBSCRIPTION_SNAPSHOT_REQUEST_VALUE) {
numSnapshotRequest++;
}
}
Assert.assertEquals(numSnapshotRequest, 1);

// Wait pending snapshot timeout
Thread.sleep(config1.getReplicatedSubscriptionsSnapshotTimeoutSeconds() * 1000);
numSnapshotRequest = 0;
r4Messages = admin4.topics()
.peekMessages(topicName, subscriptionName, 100, true, TransactionIsolationLevel.READ_UNCOMMITTED);
for (Message<byte[]> r4Message : r4Messages) {
MessageMetadata msgMetadata = ((MessageImpl<byte[]>) r4Message).getMessageBuilder();
if (msgMetadata.hasMarkerType() && msgMetadata.getMarkerType() == MarkerType.REPLICATED_SUBSCRIPTION_SNAPSHOT_REQUEST_VALUE) {
numSnapshotRequest++;
}
}
Assert.assertEquals(numSnapshotRequest, 2);

// Set back to default config.
config1.setReplicatedSubscriptionsSnapshotTimeoutSeconds(defaultSubscriptionsSnapshotTimeout);
config1.setReplicatedSubscriptionsSnapshotFrequencyMillis(defaultSubscriptionsSnapshotFrequency);
}

/**
* Disable replication subscription.
* Test scheduled task case.
Expand Down

0 comments on commit 6a2ebad

Please sign in to comment.