Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test lucene FixedBitSet - Experiment #4384

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package org.opentripplanner.transit.raptor.rangeraptor.multicriteria;

import java.util.BitSet;
import java.util.List;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.FixedBitSet;
import org.opentripplanner.transit.raptor.api.response.StopArrivals;
import org.opentripplanner.transit.raptor.api.transit.IntIterator;
import org.opentripplanner.transit.raptor.api.transit.RaptorTripSchedule;
Expand All @@ -10,7 +11,7 @@
import org.opentripplanner.transit.raptor.rangeraptor.multicriteria.arrivals.AbstractStopArrival;
import org.opentripplanner.transit.raptor.rangeraptor.path.DestinationArrivalPaths;
import org.opentripplanner.transit.raptor.rangeraptor.transit.EgressPaths;
import org.opentripplanner.transit.raptor.util.BitSetIterator;
import org.opentripplanner.transit.raptor.util.LuceneBitSetIterator;

/**
* This class serve as a wrapper for all stop arrival pareto set, one set for each stop. It also
Expand Down Expand Up @@ -38,7 +39,7 @@ public McStopArrivals(
) {
//noinspection unchecked
this.arrivals = (StopArrivalParetoSet<T>[]) new StopArrivalParetoSet[nStops];
this.touchedStops = new BitSet(nStops);
this.touchedStops = new FixedBitSet(nStops);
this.debugHandlerFactory = debugHandlerFactory;
this.debugStats = new DebugStopArrivalsStatistics(debugHandlerFactory.debugLogger());

Expand Down Expand Up @@ -85,11 +86,11 @@ public int smallestNumberOfTransfers(int stopIndex) {
}

boolean updateExist() {
return !touchedStops.isEmpty();
return touchedStops.cardinality() > 0;
}

IntIterator stopsTouchedIterator() {
return new BitSetIterator(touchedStops);
return new LuceneBitSetIterator(touchedStops);
}

void addStopArrival(AbstractStopArrival<T> arrival) {
Expand Down Expand Up @@ -117,7 +118,7 @@ void clearTouchedStopsAndSetStopMarkers() {
while (it.hasNext()) {
arrivals[it.next()].markAtEndOfSet();
}
touchedStops.clear();
touchedStops.clear(0, touchedStops.length());
}

/* private methods */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import org.opentripplanner.transit.raptor.rangeraptor.standard.internalapi.ArrivedAtDestinationCheck;
import org.opentripplanner.transit.raptor.rangeraptor.standard.internalapi.StopArrivalsState;
import org.opentripplanner.transit.raptor.rangeraptor.transit.TransitCalculator;
import org.opentripplanner.transit.raptor.util.BitSetIterator;
import org.opentripplanner.transit.raptor.util.LuceneBitSetIterator;

/**
* Tracks the state of a standard Range Raptor search, specifically the best arrival times at each
Expand Down Expand Up @@ -81,7 +81,7 @@ public IntIterator stopsTouchedPreviousRound() {
}

@Override
public BitSetIterator stopsTouchedByTransitCurrentRound() {
public LuceneBitSetIterator stopsTouchedByTransitCurrentRound() {
return bestTimes.reachedByTransitCurrentRound();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

import static org.opentripplanner.transit.raptor.util.IntUtils.intArray;

import java.util.BitSet;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.FixedBitSet;
import org.opentripplanner.transit.raptor.rangeraptor.internalapi.WorkerLifeCycle;
import org.opentripplanner.transit.raptor.rangeraptor.transit.TransitCalculator;
import org.opentripplanner.transit.raptor.util.BitSetIterator;
import org.opentripplanner.transit.raptor.util.LuceneBitSetIterator;
import org.opentripplanner.util.lang.ToStringBuilder;

/**
Expand Down Expand Up @@ -43,11 +44,11 @@ public final class BestTimes {
public BestTimes(int nStops, TransitCalculator<?> calculator, WorkerLifeCycle lifeCycle) {
this.calculator = calculator;
this.times = intArray(nStops, calculator.unreachedTime());
this.reachedCurrentRound = new BitSet(nStops);
this.reachedLastRound = new BitSet(nStops);
this.reachedCurrentRound = new FixedBitSet(nStops);
this.reachedLastRound = new FixedBitSet(nStops);

this.transitArrivalTimes = intArray(nStops, calculator.unreachedTime());
this.reachedByTransitCurrentRound = new BitSet(nStops);
this.reachedByTransitCurrentRound = new FixedBitSet(nStops);

// Attach to Worker life cycle
lifeCycle.onSetupIteration(ignore -> setupIteration());
Expand All @@ -66,21 +67,21 @@ public int transitArrivalTime(int stop) {
* @return true if at least one stop arrival was reached last round (best overall).
*/
public boolean isCurrentRoundUpdated() {
return !reachedCurrentRound.isEmpty();
return reachedCurrentRound.cardinality() > 0;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is slow, but should not have a huge impact. We can look for the first bit set or keep a flag in the BestTimes to improve this.

}

/**
* @return an iterator for all stops reached (overall best) in the last round.
*/
public BitSetIterator stopsReachedLastRound() {
return new BitSetIterator(reachedLastRound);
public LuceneBitSetIterator stopsReachedLastRound() {
return new LuceneBitSetIterator(reachedLastRound);
}

/**
* @return an iterator of all stops reached on-board in the current round.
*/
public BitSetIterator reachedByTransitCurrentRound() {
return new BitSetIterator(reachedByTransitCurrentRound);
public LuceneBitSetIterator reachedByTransitCurrentRound() {
return new LuceneBitSetIterator(reachedByTransitCurrentRound);
}

/**
Expand Down Expand Up @@ -137,7 +138,7 @@ public String toString() {
.of(BestTimes.class)
.addIntArraySize("times", times, unreachedTime)
.addIntArraySize("transitArrivalTimes", transitArrivalTimes, unreachedTime)
.addNum("reachedCurrentRound", reachedCurrentRound.size())
.addNum("reachedCurrentRound", reachedCurrentRound.length())
.addBitSetSize("reachedByTransitCurrentRound", reachedByTransitCurrentRound)
.addBitSetSize("reachedLastRound", reachedLastRound)
.toString();
Expand All @@ -156,17 +157,17 @@ boolean isStopReachedOnBoardInCurrentRound(int stop) {
*/
private void setupIteration() {
// clear all touched stops to avoid constant reëxploration
reachedCurrentRound.clear();
reachedByTransitCurrentRound.clear();
reachedCurrentRound.clear(0, reachedCurrentRound.length());
reachedByTransitCurrentRound.clear(0, reachedByTransitCurrentRound.length());
}

/**
* Prepare this class for the next round updating reached flags.
*/
private void prepareForNextRound() {
swapReachedCurrentAndLastRound();
reachedCurrentRound.clear();
reachedByTransitCurrentRound.clear();
reachedCurrentRound.clear(0, reachedCurrentRound.length());
reachedByTransitCurrentRound.clear(0, reachedCurrentRound.length());
}

/* private methods */
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package org.opentripplanner.transit.raptor.util;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;
import org.opentripplanner.transit.raptor.api.transit.IntIterator;

/**
* TODO TGR
*/
public final class LuceneBitSetIterator implements IntIterator {

private final BitSet set;
private int nextIndex;

public LuceneBitSetIterator(BitSet set) {
this.set = set;
this.nextIndex = set.nextSetBit(nextIndex);
}

@Override
public int next() {
int index = nextIndex;
nextIndex =
index == set.length() - 1 ? DocIdSetIterator.NO_MORE_DOCS : set.nextSetBit(index + 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you check the performance with SparseFixedBitSet, In that case the nextSetBit could be faster, since it checks each 4096-bit block only if at least one value is set, and it knows directly in which 64-bit segment. the regular FixedBitSet needs to check each block. It might be that linear scanning is still faster, as most of the data is already in the cache.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems to be better than org.apache.lucene.util.FixedBitSet, but still worse than java.util.BitSet

java.util.BitSet


Worker: 
 ==> mc_destination : [  464,  441,  440,  444 ] Avg: 447.3

Total:  
 ==> mc_destination : [  772,  705,  709,  711 ] Avg: 724.3


org.apache.lucene.util.FixedBitSet


Worker: 
 ==> mc_destination : [  480,  459,  465,  466 ] Avg: 467.5

Total:  
 ==> mc_destination : [  779,  741,  750,  750 ] Avg: 755.0


org.apache.lucene.util.SparseFixedBitSet


Worker: 
 ==> mc_destination : [  480,  447,  449,  452 ] Avg: 457.0

Total:  
 ==> mc_destination : [  788,  726,  722,  720 ] Avg: 739.0

return index;
}

@Override
public boolean hasNext() {
return nextIndex != DocIdSetIterator.NO_MORE_DOCS;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,15 @@ public ToStringBuilder addBitSetSize(String name, BitSet bitSet) {
return addIt(name, bitSet.cardinality() + "/" + bitSet.length());
}

/** Add the Lucene BitSet: name : {cardinality}/{logical size}/{size} */
public ToStringBuilder addBitSetSize(String name, org.apache.lucene.util.BitSet bitSet) {
if (bitSet == null) {
return this;
}
// TODO Lucene BitSet length() has a different meaning
return addIt(name, bitSet.cardinality() + "/" + bitSet.length());
}

/* Special purpose formatters */

/** Add a Coordinate location, longitude or latitude */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ public class RaptorArchitectureTest {
private static final Package RR_STD_CONFIGURE = RR_STANDARD.subPackage("configure");
private static final Package RR_CONTEXT = RANGE_RAPTOR.subPackage("context");

private static final Package LUCENE_BITSET = Package.of("org.apache.lucene.util");

@Test
void enforcePackageDependenciesRaptorAPI() {
var api = RAPTOR.subPackage("api");
Expand All @@ -42,7 +44,7 @@ void enforcePackageDependenciesRaptorAPI() {

@Test
void enforcePackageDependenciesUtil() {
RAPTOR_UTIL.dependsOn(UTILS, RAPTOR_API).verify();
RAPTOR_UTIL.dependsOn(UTILS, RAPTOR_API, LUCENE_BITSET).verify();
RAPTOR_UTIL_PARETO_SET.verify();
}

Expand Down Expand Up @@ -70,7 +72,7 @@ void enforcePackageDependenciesInRaptorImplementation() {
var stdInternalApi = RR_STANDARD.subPackage("internalapi").dependsOn(RAPTOR_API).verify();
var stdBestTimes = RR_STANDARD
.subPackage("besttimes")
.dependsOn(rrCommon, stdInternalApi)
.dependsOn(rrCommon, stdInternalApi, LUCENE_BITSET)
.verify();
var stdStopArrivals = RR_STANDARD
.subPackage("stoparrivals")
Expand Down Expand Up @@ -118,7 +120,7 @@ void enforcePackageDependenciesInRaptorImplementation() {
.subPackage("heuristic")
.dependsOn(rrCommon, mcArrivals)
.verify();
RR_MULTI_CRITERIA.dependsOn(rrCommon, mcArrivals, mcHeuristics).verify();
RR_MULTI_CRITERIA.dependsOn(rrCommon, mcArrivals, mcHeuristics, LUCENE_BITSET).verify();

RR_MC_CONFIGURE
.dependsOn(rrCommon, RR_CONTEXT, pathConfigure, mcHeuristics, RR_MULTI_CRITERIA)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,10 @@ private void runSingleTest(int sample, int nSamples) {
// We assume we are debugging and not measuring performance if we only run 1 test-case
// one time; Hence skip JIT compiler warm-up.
int samplesPrProfile = opts.numberOfTestsSamplesToRun() / opts.profiles().length;
if (testCases.size() > 1 || samplesPrProfile > 1) {
if (testCases.size() > 1 && samplesPrProfile > 1) {
// Warm-up JIT compiler, run the second test-case if it exist to avoid the same
// test case from being repeated. If there is just one case, then run it.
int index = testCases.size() == 1 ? 0 : 1;
runSingleTestCase(testCases.get(index), true);
runSingleTestCase(testCases.get(1), true);
}

ResultPrinter.logSingleTestHeader(routeProfile);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public void writeResultsToFile(List<TestCase> testCases) {
if (!tcIds.isEmpty()) {
LOG.warn(
"No results file written, at least one test-case is not run or returned without any result!" +
" Test-Cases: " +
" Failed Test-Cases: " +
tcIds
);
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ public void finishUp() {

public int totalTimerMean(String timerName) {
long count = getTotalTimers(timerName).mapToLong(Timer::count).sum();
if (count == 0) {
return 0;
}
return (int) (testTotalTimeMs(timerName) / count);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package org.opentripplanner.transit.raptor.util;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.FixedBitSet;
import org.junit.jupiter.api.Test;

public class LuceneBitSetIteratorTest {

@Test
public void test() {
LuceneBitSetIterator it;
BitSet set = new FixedBitSet(6);

// Empty set does not have any elements
it = new LuceneBitSetIterator(set);
assertFalse(it.hasNext());

set.set(2);
it = new LuceneBitSetIterator(set);
assertTrue(it.hasNext());
assertEquals(2, it.next());
assertFalse(it.hasNext());

// set: [0, 2, 5], 2 is added above
set.set(0);
set.set(5);
it = new LuceneBitSetIterator(set);
assertTrue(it.hasNext());
assertEquals(0, it.next());
assertTrue(it.hasNext());
assertEquals(2, it.next());
assertTrue(it.hasNext());
assertEquals(5, it.next());
assertFalse(it.hasNext());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ public class UtilArchitectureTest {
private static final Package LANG = UTIL.subPackage("lang");
private static final Package TIME = UTIL.subPackage("time");

private static final Package LUCENE_BITSET = Package.of("org.apache.lucene.util");

@Test
void enforcePackageDependencies() {
// The util packages needs cleanup, it contains model, framework and API classes. The strategy
Expand All @@ -21,6 +23,6 @@ void enforcePackageDependencies() {
// It might sound strange that lang depend on time, but we allow this to avoid creating another
// util package where we can put the ToStringBuilder classes(witch depend on time). As long as
// we do not get cyclic dependencies between lang and time there is not problem with this.
LANG.dependsOn(TIME).verify();
LANG.dependsOn(TIME, LUCENE_BITSET).verify();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,10 @@ public void addBitSetSize() {
subject().addBitSetSize("bitSet", bset).toString()
);

assertEquals("ToStringBuilderTest{}", subject().addBitSetSize("bitSet", null).toString());
assertEquals(
"ToStringBuilderTest{}",
subject().addBitSetSize("bitSet", (BitSet) null).toString()
);
}

@Test
Expand Down