Skip to content

Commit

Permalink
Reimplement Multi.flatMapIterable + TCK test (#1467)
Browse files Browse the repository at this point in the history
  • Loading branch information
akarnokd authored Mar 6, 2020
1 parent 859745f commit 1673ddf
Show file tree
Hide file tree
Showing 7 changed files with 723 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,23 @@ default <U> Multi<U> flatMap(Function<T, Flow.Publisher<U>> mapper, long maxConc
* @param <U> output item type
* @return Multi
*/
default <U> Multi<U> flatMapIterable(Function<T, Iterable<U>> iterableMapper) {
MultiFlatMapProcessor<T, U> processor = MultiFlatMapProcessor.fromIterableMapper(iterableMapper);
this.subscribe(processor);
return processor;
default <U> Multi<U> flatMapIterable(Function<? super T, ? extends Iterable<? extends U>> iterableMapper) {
return flatMapIterable(iterableMapper, 32);
}

/**
* Transform item with supplied function and flatten resulting {@link Iterable} to downstream.
*
* @param iterableMapper {@link Function} receiving item as parameter and returning {@link Iterable}
* @param prefetch the number of upstream items to request upfront, then 75% of this value after
* 75% received and mapped
* @param <U> output item type
* @return Multi
*/
default <U> Multi<U> flatMapIterable(Function<? super T, ? extends Iterable<? extends U>> iterableMapper,
int prefetch) {
Objects.requireNonNull(iterableMapper, "iterableMapper is null");
return new MultiFlatMapIterable<>(this, iterableMapper, prefetch);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
/*
* Copyright (c) 2020 Oracle and/or its affiliates. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package io.helidon.common.reactive;

import java.util.Iterator;
import java.util.Objects;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.Flow;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;

/**
* Map each upstream item into an Iterable and stream their values.
* @param <T> the upstream item type
* @param <R> the output item type
*/
final class MultiFlatMapIterable<T, R> implements Multi<R> {

private final Multi<T> source;

private final Function<? super T, ? extends Iterable<? extends R>> mapper;

private final int prefetch;

MultiFlatMapIterable(Multi<T> source,
Function<? super T, ? extends Iterable<? extends R>> mapper,
int prefetch) {
this.source = source;
this.mapper = mapper;
this.prefetch = prefetch;
}

@Override
public void subscribe(Flow.Subscriber<? super R> subscriber) {
source.subscribe(new FlatMapIterableSubscriber<>(subscriber, mapper, prefetch));
}

static final class FlatMapIterableSubscriber<T, R>
extends AtomicInteger
implements Flow.Subscriber<T>, Flow.Subscription {

private final Flow.Subscriber<? super R> downstream;

private final Function<? super T, ? extends Iterable<? extends R>> mapper;

private final int prefetch;

private final AtomicLong requested;

private final ConcurrentLinkedQueue<T> queue;

private Flow.Subscription upstream;

private long emitted;

private volatile boolean upstreamDone;
private Throwable error;

private volatile boolean canceled;

private Iterator<? extends R> currentIterator;

private int upstreamConsumed;

FlatMapIterableSubscriber(Flow.Subscriber<? super R> downstream,
Function<? super T, ? extends Iterable<? extends R>> mapper,
int prefetch) {
this.downstream = downstream;
this.mapper = mapper;
this.prefetch = prefetch;
this.requested = new AtomicLong();
this.queue = new ConcurrentLinkedQueue<>();
}

@Override
public void onSubscribe(Flow.Subscription subscription) {
SubscriptionHelper.validate(upstream, subscription);
upstream = subscription;
downstream.onSubscribe(this);
subscription.request(prefetch);
}

@Override
public void onNext(T item) {
queue.offer(item);
drain();
}

@Override
public void onError(Throwable throwable) {
error = throwable;
upstreamDone = true;
drain();
}

@Override
public void onComplete() {
upstreamDone = true;
drain();
}

@Override
public void request(long n) {
if (n <= 0L) {
onError(new IllegalArgumentException("Rule §3.9 violated: non-positive requests are forbidden!"));
} else {
SubscriptionHelper.addRequest(requested, n);
drain();
}
}

@Override
public void cancel() {
canceled = true;
upstream.cancel();
drain();
}

void drain() {
if (getAndIncrement() != 0) {
return;
}

Iterator<? extends R> iterator = currentIterator;
Flow.Subscriber<? super R> downstream = this.downstream;
long e = emitted;
int limit = prefetch - (prefetch >> 2);

int missed = 1;
outer:
for (;;) {

if (canceled) {
iterator = null;
currentIterator = null;
queue.clear();
} else {
if (upstreamDone) {
Throwable ex = error;
if (ex != null) {
canceled = true;
downstream.onError(ex);
continue;
}
}
if (iterator == null) {
boolean d = upstreamDone;
T item = queue.poll();
boolean empty = item == null;

if (d && empty) {
canceled = true;
downstream.onComplete();
continue;

This comment has been minimized.

Copy link
@OlegDokuka

OlegDokuka Mar 20, 2020

what is the purpose to continue there if the onComplete has already sent?

This comment has been minimized.

Copy link
@akarnokd

akarnokd Mar 20, 2020

Author Collaborator

Yeah, not much.

}

if (!empty) {

int c = upstreamConsumed + 1;
if (c == limit) {
upstreamConsumed = 0;
upstream.request(limit);

This comment has been minimized.

Copy link
@olotenko

olotenko Mar 20, 2020

According to the long-standing position of the owners of reactive spec that request must not be called synchronously during onComplete, this is a violation of that spec.

(But not really a problem in my view)

This comment has been minimized.

Copy link
@OlegDokuka

OlegDokuka Mar 20, 2020

It is not a synchronous call. If it is a synchronous one, then upstreamDone should be true (assume we got onComplet, which puts upstreamDone to true and you basically may not call request after that flag anymore (calling makes zero sense here).

This comment has been minimized.

Copy link
@akarnokd

akarnokd Mar 20, 2020

Author Collaborator

It is unlikely as it would require an async downstream request call to trigger the drain loop when the upstream signals an item and then completes while limit = 1. An extra check on upstreamDone can be added here. PR welcome.

This comment has been minimized.

Copy link
@OlegDokuka

This comment has been minimized.

Copy link
@olotenko

olotenko Mar 21, 2020

Why would I work around a bug in the spec?

There are more serious issues with this implementation.

This comment has been minimized.

Copy link
@OlegDokuka

OlegDokuka Mar 21, 2020

@olotenko My point is there is no points to change the spec. Extra boolean check does not bring huge overhead

This comment has been minimized.

Copy link
@viktorklang

viktorklang Mar 21, 2020

@olotenko @OlegDokuka @akarnokd I think it is highly relevant to see what changes as necessary for this piece of code to become spec compliant. If that turns out to be a major hassle then of course that has more bearing on whether the spec should change or not. But keep in mind, that such a spec change would not provide any extra value for any user of Reactive Streams (the spec is not made to make life easy for implementors, but make it easier for the users of Reactive Streams—to know that it will just work.

This comment has been minimized.

Copy link
@olotenko

olotenko Mar 21, 2020

The highly relevant bit is how much effort it takes to detect such cases.

What happens if this goes undetected? Is the cost of this happening higher than the cost of the hunt.

This comment has been minimized.

Copy link
@OlegDokuka

OlegDokuka Mar 21, 2020

@viktorklang it is just 3 lines of changes to which I have already pointed. What is necessary to do is to add an extra boolean flag check

if (c == limit) { 

becomes

if (c == limit && !upstreamDone) {

(it can be even more optimized -> instead of volatile upstreamDone check we can reuse already read the d state from the same volatile field a couple of lines before

and
1673ddf#diff-0f519ea307142812e3a65e47f248e24cR170

becomes return

and

1673ddf#diff-0f519ea307142812e3a65e47f248e24cR159

becomes

return

Hooray. we fixed it

This comment has been minimized.

Copy link
@akarnokd

akarnokd Mar 21, 2020

Author Collaborator

L159 has to stay continue because the upstream may fail while there is an iterator currently backpressured and thus both the queue and the reference have to be cleared.

This comment has been minimized.

Copy link
@OlegDokuka

OlegDokuka Mar 21, 2020

Right. Agree. But in any case, we can easily fix the current implementation in order to prevent unwanted interaction with a subscription

This comment has been minimized.

Copy link
@viktorklang

viktorklang Mar 21, 2020

@olotenko Didn't the TCK catch it in your case? And IIRC @akarnokd also submitted an improvement to the TCK for this case, didn't he? In this case, @olotenko's suggestion does not add a single LoC, and only results in an additional boolean check in a single if-statement, this seems like one of the most trivial changes I can imagine, so changing the spec does definitely not be warranted on the merits of implementation simplification. And changing the spec does not improve the TCK either, as an eventually check is impossible without mandating a concrete time window, right?

This comment has been minimized.

Copy link
@olotenko

olotenko Mar 21, 2020

@akarnokd yes. There are more issues than that.

@OlegDokuka No. The test didn't pick it up, @akarnokd wasn't looking, I was casually looking generally what has been committed. The issue has slipped, as far as I can tell.

@viktorklang I don't know why we are discussing it here. No, this issue that this discussion is about, is not what TCK has picked up.

Now justify the cost of the hunt for such non-compliance anywhere else I haven't looked at yet.

This comment has been minimized.

Copy link
@OlegDokuka

OlegDokuka Mar 21, 2020

I ran this discussion because of the link in the corresponding reactive-streams spec issue. I briefly looked into the source and found a couple of missed checks which is going to prevent the specification violation.

Now justify the cost of the hunt for such non-compliance anywhere else I haven't looked at yet.

I guess we can say that if we are not going to follow the specification at all - there will be no issues to hunt.

As I said before, if there is a problem in TCK, let's fix them, no need to change the spec.

This comment has been minimized.

Copy link
@olotenko

olotenko Mar 21, 2020

@OlegDokuka That's digression. What's the cost of breaking that rule? My take: zero. Nothing to hunt.

Can anyone demonstrate the cost of breaking that rule is non-zero?

This comment has been minimized.

Copy link
@viktorklang

viktorklang Mar 21, 2020

@olotenko The TCK should help shorten that hunt down, that is the purpose of it. Did you find its error message inadequate, and if so, which modification of it would have helped you even more?

Demonstration of the cost of breaking the rule: any Subscriper implementation which uses the knowledge provided by that rule is broken. Since you are proposing a breaking change of the spec, without any clear user or implementor gain, as it stands right now, the price to pay for breaking the spec is not justified for its gain.

Realistically the burden of evidence is not on the project, but on the party which proposes the change, for natural reasons.

I’m glad there were simple modifications to amend your implementation to be compliant, now if you don’t mind, I’ll have to get back to spending time with my family.

Cheers,
V

This comment has been minimized.

Copy link
@olotenko

olotenko Mar 21, 2020

I am not sure which thing in that statement is the demonstration of the cost. It amounts to "the cost of breaking the rule is the broken rule".

Since you are proposing a breaking change...

No one demonstrated what breaks.

This comment has been minimized.

Copy link
@viktorklang

viktorklang Mar 21, 2020

No one demonstrated what breaks.

Yes, we did.

Demonstration of the cost of breaking the rule: any Subscriber implementation which uses the knowledge provided by that rule is broken.<<<

Clearly we are not aware of all implementations of Subscription, nor should we, but the purpose of the specification is to establish the ground rules, so that implementations know what they can assume. Your proposed change removes an established assumption founded in the specification—hence a breaking change.
Since the value gained by the change is miniscule: for implementors, and none: for users, it is not justifiable IMO.

This comment has been minimized.

Copy link
@olotenko

olotenko Mar 22, 2020

A Subscriber cannot benefit from knowing that rule, so I presume you meant Publisher. The challenge ("what breaks") I posed, and which wasn't addressed, is to present such a Publisher - the Publisher that meets the modern spec, and can both handle request concurrent with onComplete, and not handle synchronous request.

Anyway. This discussion really belongs to the reactive-streams issue track, and these arguments, both yours and mine, were already sounded there. I only posted a reference to the code contributed by others as an argument for the reality of having to hunt the issues. I will hunt for issues that are relevant. I won't hunt for issues that are applicable to obsolete requirements.

The value of the change is to not waste time looking for issues that are no-op. My theory about no one crying with pain, is because no one is looking at all; they mostly just make sure TCK passes, and then wait for issues to surface later.

} else {
upstreamConsumed = c;
}

boolean hasNext;
try {
iterator = Objects.requireNonNull(
mapper.apply(item).iterator(),
"The Iterable returned a null iterator"
);

if (canceled) {
continue;
}

hasNext = iterator.hasNext();
} catch (Throwable ex) {
canceled = true;
downstream.onError(ex);
continue;
}

if (!hasNext) {
iterator = null;
continue;
}
currentIterator = iterator;
}
}

if (iterator != null) {
long r = requested.get();

while (e != r) {

if (canceled) {
continue outer;
}

R result;

try {
result = Objects.requireNonNull(iterator.next(),
"The iterator returned a null item");
} catch (Throwable ex) {
canceled = true;
downstream.onError(ex);
continue outer;
}

if (canceled) {
continue outer;
}

downstream.onNext(result);
e++;

if (canceled) {
continue outer;
}

boolean hasNext;
try {
hasNext = iterator.hasNext();
} catch (Throwable ex) {
canceled = true;
downstream.onError(ex);
continue outer;
}

if (canceled) {
continue outer;
}

if (!hasNext) {
iterator = null;
currentIterator = null;
continue outer;
}
}
}
}

emitted = e;
missed = addAndGet(-missed);
if (missed == 0) {
break;
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (c) 2020 Oracle and/or its affiliates. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.helidon.common.reactive;

import org.reactivestreams.tck.TestEnvironment;
import org.reactivestreams.tck.flow.FlowPublisherVerification;

import java.util.Collections;
import java.util.concurrent.Flow;
import java.util.stream.IntStream;

public class MultiFlatMapIterableManyToManyTckTest extends FlowPublisherVerification<Integer> {

public MultiFlatMapIterableManyToManyTckTest() {
super(new TestEnvironment(50));
}

@Override
public Flow.Publisher<Integer> createFlowPublisher(long l) {
return Multi.just(l >> 1, l - (l >> 1))
.flatMapIterable(v -> () -> IntStream.range(0, v.intValue()).boxed().iterator());
}

@Override
public Flow.Publisher<Integer> createFailedFlowPublisher() {
return null;
}

@Override
public long maxElementsFromPublisher() {
return 10;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (c) 2020 Oracle and/or its affiliates. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.helidon.common.reactive;

import org.reactivestreams.tck.TestEnvironment;
import org.reactivestreams.tck.flow.FlowPublisherVerification;

import java.util.Collections;
import java.util.concurrent.Flow;
import java.util.stream.IntStream;

public class MultiFlatMapIterableManyToOneTckTest extends FlowPublisherVerification<Integer> {

public MultiFlatMapIterableManyToOneTckTest() {
super(new TestEnvironment(50));
}

@Override
public Flow.Publisher<Integer> createFlowPublisher(long l) {
return Multi.from(() -> IntStream.range(0, (int)l).boxed().iterator())
.flatMapIterable(Collections::singleton);
}

@Override
public Flow.Publisher<Integer> createFailedFlowPublisher() {
return null;
}

@Override
public long maxElementsFromPublisher() {
return 10;
}
}
Loading

0 comments on commit 1673ddf

Please sign in to comment.