Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle DNS SERVFAIL differently than NXDOMAIN #2776

Merged
merged 14 commits into from
Apr 11, 2024
24 changes: 24 additions & 0 deletions servicetalk-dns-discovery-netty/gradle/checkstyle/suppressions.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Copyright © 2018 Apple Inc. and the ServiceTalk project authors
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!DOCTYPE suppressions PUBLIC
"-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN"
"https://checkstyle.org/dtds/suppressions_1_2.dtd">

<suppressions>
<suppress checks="StaticVariableName"
files="io[\\/]servicetalk[\\/]dns[\\/]discovery[\\/]netty[\\/]DefaultDnsServiceDiscovererBuilder.java"/>
</suppressions>
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,8 @@
<Source name="~.*Test\.java"/>
<Bug pattern="THROWS_METHOD_THROWS_CLAUSE_THROWABLE"/>
</Match>
<Match>
<Source name="~.*Test\.java"/>
<Bug pattern="ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD"/>
</Match>
</FindBugsFilter>
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@

import static io.netty.handler.codec.dns.DefaultDnsRecordDecoder.decodeName;
import static io.netty.handler.codec.dns.DnsRecordType.SRV;
import static io.netty.handler.codec.dns.DnsResponseCode.NXDOMAIN;
import static io.servicetalk.client.api.ServiceDiscovererEvent.Status.AVAILABLE;
import static io.servicetalk.concurrent.api.AsyncCloseables.toAsyncCloseable;
import static io.servicetalk.concurrent.api.Completable.completed;
Expand All @@ -95,6 +96,7 @@
import static io.servicetalk.concurrent.internal.SubscriberUtils.newExceptionForInvalidRequestN;
import static io.servicetalk.concurrent.internal.SubscriberUtils.safeOnError;
import static io.servicetalk.concurrent.internal.ThrowableUtils.unknownStackTrace;
import static io.servicetalk.dns.discovery.netty.DefaultDnsServiceDiscovererBuilder.NX_DOMAIN_INVALIDATES;
import static io.servicetalk.dns.discovery.netty.DnsClients.mapEventList;
import static io.servicetalk.dns.discovery.netty.DnsResolverAddressTypes.IPV4_PREFERRED;
import static io.servicetalk.dns.discovery.netty.DnsResolverAddressTypes.IPV6_PREFERRED;
Expand Down Expand Up @@ -135,15 +137,14 @@ final class DefaultDnsClient implements DnsClient {
private final IntFunction<? extends Completable> srvHostNameRepeater;
private final int srvConcurrency;
private final boolean srvFilterDuplicateEvents;
private final boolean inactiveEventsOnError;
private final DnsResolverAddressTypes addressTypes;
private final String id;
private boolean closed;

DefaultDnsClient(final String id, final IoExecutor ioExecutor, final int consolidateCacheSize,
final int minTTL, final int maxTTL, final int minCacheTTL, final int maxCacheTTL,
final int negativeTTLCacheSeconds, final long ttlJitterNanos,
final int srvConcurrency, final boolean inactiveEventsOnError,
final int srvConcurrency,
final boolean completeOncePreferredResolved, final boolean srvFilterDuplicateEvents,
Duration srvHostNameRepeatInitialDelay, Duration srvHostNameRepeatJitter,
@Nullable Integer maxUdpPayloadSize, @Nullable final Integer ndots,
Expand All @@ -155,7 +156,6 @@ final class DefaultDnsClient implements DnsClient {
final ServiceDiscovererEvent.Status missingRecordStatus) {
this.srvConcurrency = srvConcurrency;
this.srvFilterDuplicateEvents = srvFilterDuplicateEvents;
this.inactiveEventsOnError = inactiveEventsOnError;
// Implementation of this class expects to use only single EventLoop from IoExecutor
this.nettyIoExecutor = toEventLoopAwareNettyIoExecutor(ioExecutor).next();
// We must use nettyIoExecutor for the repeater for thread safety!
Expand Down Expand Up @@ -252,9 +252,8 @@ public Publisher<Collection<ServiceDiscovererEvent<InetAddress>>> dnsQuery(final
return defer(() -> {
final DnsDiscoveryObserver discoveryObserver = newDiscoveryObserver(address);
ARecordPublisher pub = new ARecordPublisher(address, discoveryObserver);
Publisher<? extends Collection<ServiceDiscovererEvent<InetAddress>>> events = inactiveEventsOnError ?
recoverWithInactiveEvents(pub, false) :
pub;
Publisher<? extends Collection<ServiceDiscovererEvent<InetAddress>>> events =
recoverWithInactiveEvents(pub, false);
return discoveryObserver == null ? events : events.beforeFinally(new TerminalSignalConsumer() {
@Override
public void onComplete() {
Expand Down Expand Up @@ -338,7 +337,7 @@ public Publisher<Collection<ServiceDiscovererEvent<InetSocketAddress>>> dnsSrvQu
return empty();
}
}, srvConcurrency)
.liftSync(inactiveEventsOnError ? SrvInactiveCombinerOperator.EMIT : SrvInactiveCombinerOperator.NO_EMIT);
.liftSync(SrvInactiveCombinerOperator.EMIT);

return discoveryObserver == null ? events : events.beforeFinally(new TerminalSignalConsumer() {
@Override
Expand Down Expand Up @@ -955,7 +954,7 @@ private static <T, A> Publisher<? extends Collection<ServiceDiscovererEvent<T>>>
AbstractDnsPublisher<T> pub, boolean generateAggregateEvent) {
return pub.onErrorResume(cause -> {
AbstractDnsPublisher<T>.AbstractDnsSubscription subscription = pub.subscription;
if (subscription != null) {
if (subscription != null && shouldRevokeState(cause)) {
List<ServiceDiscovererEvent<T>> events = subscription.generateInactiveEvent();
if (!events.isEmpty()) {
return (generateAggregateEvent ? Publisher.<List<ServiceDiscovererEvent<T>>>from(
Expand All @@ -968,6 +967,15 @@ private static <T, A> Publisher<? extends Collection<ServiceDiscovererEvent<T>>>
});
}

private static boolean shouldRevokeState(final Throwable t) {
// ISE => Subscriber exceptions (downstream of retry)
return t instanceof SrvAddressRemovedException || t instanceof IllegalStateException ||
t instanceof ClosedDnsServiceDiscovererException || (NX_DOMAIN_INVALIDATES &&
// string matching is done on purpose to avoid the hard Netty dependency
(t.getCause() != null && t.getCause().getClass().getSimpleName().contains("DnsErrorCauseException")) &&
NXDOMAIN.equals(((io.netty.resolver.dns.DnsErrorCauseException) t.getCause()).getCode()));
}

private static <T> Publisher<T> newDuplicateSrv(String serviceName, String resolvedAddress) {
return failed(new IllegalStateException("Duplicate SRV entry for SRV name " + serviceName + " for address " +
resolvedAddress));
Expand All @@ -982,7 +990,6 @@ private static final class SrvInactiveCombinerOperator implements
PublisherOperator<Collection<ServiceDiscovererEvent<InetSocketAddress>>,
Collection<ServiceDiscovererEvent<InetSocketAddress>>> {
static final SrvInactiveCombinerOperator EMIT = new SrvInactiveCombinerOperator(true);
static final SrvInactiveCombinerOperator NO_EMIT = new SrvInactiveCombinerOperator(false);
private final boolean emitAggregatedEvents;

private SrvInactiveCombinerOperator(boolean emitAggregatedEvents) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ public final class DefaultDnsServiceDiscovererBuilder implements DnsServiceDisco
*/
@Deprecated // FIXME: 0.43 - consider removing this system property
private static final String SKIP_BINDING_PROPERTY = "io.servicetalk.dns.discovery.netty.skipBinding";
private static final String NX_DOMAIN_INVALIDATES_PROPERTY = "io.servicetalk.dns.discovery.nxdomain.invalidation";
@SuppressWarnings("PMD.MutableStaticState")
static boolean NX_DOMAIN_INVALIDATES = parseProperty(NX_DOMAIN_INVALIDATES_PROPERTY, true);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only thing that worries me is that if DNS server had a blip or if someone removed entries before adding new entries, DNS client could receive NXDOMAIN, then invalidate all entries, and then end up in an awkward state without any addresses until another resolution happens. Could you please elaborate a bit more on why it is important to invalidate addresses in this case? Can we just wait for another result set (keep using pre-existing IPs) while we retry UnknownHostException?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NXDOMAIN is not / should not, be a blip. Its a valid answer that declares that DNS has no knowledge of that domain. The resolver makes every effort to resolve from the nameservers before it gives up. DNS misbehavior (such as SERVFAIL or timeouts) will still be handled more gracefully. Every other case (e.g., removed entries before adding entries) are flows that are beyond the scope of a resolver to heuristically work around, we should propagate the correct state at all times, which makes it easier to debug and reason about.

@Nullable
private static final SocketAddress DEFAULT_LOCAL_ADDRESS =
getBoolean(SKIP_BINDING_PROPERTY) ? null : new InetSocketAddress(0);
Expand Down Expand Up @@ -101,6 +104,7 @@ public final class DefaultDnsServiceDiscovererBuilder implements DnsServiceDisco
LOGGER.debug("-D{}={}", NEGATIVE_TTL_CACHE_SECONDS_PROPERTY, negativeCacheTtlValue);
LOGGER.debug("Default negative TTL cache in seconds: {}", DEFAULT_NEGATIVE_TTL_CACHE_SECONDS);
LOGGER.debug("Default missing records status: {}", DEFAULT_MISSING_RECOREDS_STATUS);
LOGGER.debug("-D{}: {}", NX_DOMAIN_INVALIDATES_PROPERTY, NX_DOMAIN_INVALIDATES);
}
}

Expand Down Expand Up @@ -128,7 +132,6 @@ public final class DefaultDnsServiceDiscovererBuilder implements DnsServiceDisco
private int negativeTTLCacheSeconds = DEFAULT_NEGATIVE_TTL_CACHE_SECONDS;
private Duration ttlJitter = ofSeconds(DEFAULT_TTL_POLL_JITTER_SECONDS);
private int srvConcurrency = 2048;
private boolean inactiveEventsOnError;
private boolean completeOncePreferredResolved = true;
private boolean srvFilterDuplicateEvents;
private Duration srvHostNameRepeatInitialDelay = ofSeconds(10);
Expand Down Expand Up @@ -300,11 +303,6 @@ public DefaultDnsServiceDiscovererBuilder missingRecordStatus(ServiceDiscovererE
return asHostAndPortDiscoverer(build());
}

DefaultDnsServiceDiscovererBuilder inactiveEventsOnError(boolean inactiveEventsOnError) {
this.inactiveEventsOnError = inactiveEventsOnError;
return this;
}

DefaultDnsServiceDiscovererBuilder srvConcurrency(int srvConcurrency) {
this.srvConcurrency = ensurePositive(srvConcurrency, "srvConcurrency");
return this;
Expand All @@ -317,8 +315,11 @@ DefaultDnsServiceDiscovererBuilder completeOncePreferredResolved(boolean complet

DefaultDnsServiceDiscovererBuilder srvHostNameRepeatDelay(
Duration initialDelay, Duration jitter) {
this.srvHostNameRepeatInitialDelay = requireNonNull(initialDelay);
this.srvHostNameRepeatJitter = requireNonNull(jitter);
this.srvHostNameRepeatInitialDelay = ensurePositive(initialDelay, "srvHostNameRepeatInitialDelay");
this.srvHostNameRepeatJitter = ensurePositive(jitter, "srvHostNameRepeatJitter");
if (srvHostNameRepeatJitter.toNanos() >= srvHostNameRepeatInitialDelay.toNanos()) {
throw new IllegalArgumentException("The jitter value should be less than the initial delay.");
}
return this;
}

Expand Down Expand Up @@ -369,7 +370,7 @@ DnsClient build() {
ioExecutor == null ? globalExecutionContext().ioExecutor() : ioExecutor, consolidateCacheSize,
minTTLSeconds, maxTTLSeconds, minTTLCacheSeconds, maxTTLCacheSeconds, negativeTTLCacheSeconds,
ttlJitter.toNanos(),
srvConcurrency, inactiveEventsOnError, completeOncePreferredResolved, srvFilterDuplicateEvents,
srvConcurrency, completeOncePreferredResolved, srvFilterDuplicateEvents,
srvHostNameRepeatInitialDelay, srvHostNameRepeatJitter, maxUdpPayloadSize, ndots, optResourceEnabled,
queryTimeout, dnsResolverAddressTypes, localAddress, dnsServerAddressStreamProvider, observer,
missingRecordStatus);
Expand All @@ -389,4 +390,12 @@ private static int parseProperty(final String name, final int defaultValue) {
return defaultValue;
}
}

private static boolean parseProperty(final String name, final boolean defaultValue) {
final String value = getProperty(name);
if (value == null) {
return defaultValue;
}
return Boolean.parseBoolean(value);
}
}
Loading
Loading