diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java index 65190871e50..d8ce455fdc8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java @@ -19,6 +19,8 @@ package org.apache.lucene.codecs.lucene90.blocktree; import java.io.IOException; import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.util.fst.ByteSequenceOutputs; +import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; @@ -44,7 +46,8 @@ final class IntersectTermsEnum extends BaseTermsEnum { // static boolean DEBUG = BlockTreeTermsWriter.DEBUG; final IndexInput in; - + static final Outputs fstOutputs = ByteSequenceOutputs.getSingleton(); + IntersectTermsEnumFrame[] stack; @SuppressWarnings({"rawtypes", "unchecked"}) @@ -114,6 +117,7 @@ final class IntersectTermsEnum extends BaseTermsEnum { f.prefix = 0; f.setState(0); f.arc = arc; + f.outputPrefix = arc.output(); f.load(fr.rootCode); // for assert: @@ -183,24 +187,47 @@ final class IntersectTermsEnum extends BaseTermsEnum { FST.Arc arc = currentFrame.arc; int idx = currentFrame.prefix; assert currentFrame.suffix > 0; - + BytesRef output = currentFrame.outputPrefix; + outputAccumulator.reset(); + /* + System.out.println("push frame current stack:"); + for(int xx=0;xx<=currentFrame.ord;xx++) { + System.out.println(" ord=" + xx + " label=" + (char) stack[xx].arc.label() + " arc.output=" + stack[xx].arc.output() + " arc.nextFinalOutput=" + stack[xx].arc.nextFinalOutput() + " outputPrefix=" + stack[xx].outputPrefix); + } + */ + outputAccumulator.push(arc.output()); + // TODO: this fixes it? preserve full arc output history + //outputAccumulator.push(currentFrame.outputPrefix); + while (idx < f.prefix) { final int target = term.bytes[idx] & 0xff; // TODO: we could be more efficient for the next() // case by using current arc as starting point, // passed to findTargetArc + //System.out.println(" follow letter=" + (char) target); arc = fr.index.findTargetArc(target, arc, getArc(1 + idx), fstReader); assert arc != null; outputAccumulator.push(arc.output()); + output = fstOutputs.add(output, arc.output()); idx++; } f.arc = arc; + f.outputPrefix = output; assert arc.isFinal(); + //System.out.println(" arc.nextFinalOutput()=" + arc.nextFinalOutput()); outputAccumulator.push(arc.nextFinalOutput()); - f.load(outputAccumulator); + BytesRef oldWay = fstOutputs.add(output, arc.nextFinalOutput()); + //System.out.println(" old way: " + oldWay); + + BytesRef newWay = outputAccumulator.toBytesRef(); + if (newWay.equals(oldWay) == false) { + System.out.println("DIFF: oldWay=" + oldWay + " newWay=" + newWay); + } + //f.load(outputAccumulator); + f.load(oldWay); return f; } @@ -370,7 +397,9 @@ final class IntersectTermsEnum extends BaseTermsEnum { @Override public BytesRef next() throws IOException { try { - return _next(); + BytesRef nextTerm = _next(); + //System.out.println("return term=" + nextTerm.utf8ToString() + "\n"); + return nextTerm; } catch ( @SuppressWarnings("unused") NoMoreTermsException eoi) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java index 2b0e05a0b09..651e75ca055 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java @@ -89,6 +89,9 @@ final class IntersectTermsEnumFrame { final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); + // Cumulative output so far + BytesRef outputPrefix; + int startBytePos; int suffix; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java index 30a4529c5da..c1080da83c2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java @@ -1207,6 +1207,21 @@ final class SegmentTermsEnum extends BaseTermsEnum { current = outputs[0]; } + // nocommit temp debugging code: + BytesRef toBytesRef() { + int byteCount = 0; + for(int i=0; i