From 82f3f06706f805b51507edf6c0959af39319bbef Mon Sep 17 00:00:00 2001 From: gf2121 <52390227+gf2121@users.noreply.github.com> Date: Tue, 12 Dec 2023 20:26:02 +0800 Subject: [PATCH] Push and pop OutputAccumulator as IntersectTermsEnumFrames are pushed and popped (#12900) --- .../TestBackwardsCompatibility.java | 2 -- .../blocktree/IntersectTermsEnum.java | 9 +++++-- .../blocktree/IntersectTermsEnumFrame.java | 2 ++ .../lucene90/blocktree/SegmentTermsEnum.java | 27 ++++++++++++++----- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java index bde6090d45b3..cc3ca818ffe8 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java @@ -128,7 +128,6 @@ import org.apache.lucene.util.Version; import org.junit.AfterClass; import org.junit.BeforeClass; -import org.junit.Ignore; /* Verify we can read previous versions' indexes, do searches @@ -2273,7 +2272,6 @@ public void testFailOpenOldIndex() throws IOException { // #12895: test on a carefully crafted 9.8.0 index (from a small contiguous subset // of wikibigall unique terms) that shows the read-time exception of // IntersectTermsEnum (used by WildcardQuery) - @Ignore("re-enable once we merge #12900") public void testWildcardQueryExceptions990() throws IOException { Path path = createTempDir("12895"); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java index 65190871e501..2346ae892a35 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java @@ -120,6 +120,8 @@ public IntersectTermsEnum( assert setSavedStartTerm(startTerm); currentFrame = f; + outputAccumulator.push(currentFrame.arc.output()); + if (startTerm != null) { seekToStartTerm(startTerm); } @@ -184,8 +186,7 @@ private IntersectTermsEnumFrame pushFrame(int state) throws IOException { int idx = currentFrame.prefix; assert currentFrame.suffix > 0; - outputAccumulator.reset(); - outputAccumulator.push(arc.output()); + int initOutputCount = outputAccumulator.outputCount(); while (idx < f.prefix) { final int target = term.bytes[idx] & 0xff; // TODO: we could be more efficient for the next() @@ -198,9 +199,11 @@ private IntersectTermsEnumFrame pushFrame(int state) throws IOException { } f.arc = arc; + f.outputNum = outputAccumulator.outputCount() - initOutputCount; assert arc.isFinal(); outputAccumulator.push(arc.nextFinalOutput()); f.load(outputAccumulator); + outputAccumulator.pop(arc.nextFinalOutput()); return f; } @@ -343,6 +346,7 @@ private boolean popPushNext() throws IOException { throw NoMoreTermsException.INSTANCE; } final long lastFP = currentFrame.fpOrig; + outputAccumulator.pop(currentFrame.outputNum); currentFrame = stack[currentFrame.ord - 1]; currentTransition = currentFrame.transition; assert currentFrame.lastSubFP == lastFP; @@ -429,6 +433,7 @@ private BytesRef _next() throws IOException { currentFrame = null; return null; } + outputAccumulator.pop(currentFrame.outputNum); currentFrame = stack[currentFrame.ord - 1]; currentTransition = currentFrame.transition; isSubBlock = popPushNext(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java index 2b0e05a0b09c..9f6bb75788e2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java @@ -89,6 +89,8 @@ final class IntersectTermsEnumFrame { final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); + int outputNum; + int startBytePos; int suffix; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java index 30a4529c5da0..f8e3c50bcb2a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java @@ -495,7 +495,7 @@ public boolean seekExact(BytesRef target) throws IOException { targetUpto = 0; outputAccumulator.push(arc.nextFinalOutput()); currentFrame = pushFrame(arc, 0); - outputAccumulator.pop(); + outputAccumulator.pop(arc.nextFinalOutput()); } // if (DEBUG) { @@ -569,7 +569,7 @@ public boolean seekExact(BytesRef target) throws IOException { // if (DEBUG) System.out.println(" arc is final!"); outputAccumulator.push(arc.nextFinalOutput()); currentFrame = pushFrame(arc, targetUpto); - outputAccumulator.pop(); + outputAccumulator.pop(arc.nextFinalOutput()); // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + // currentFrame.hasTerms); } @@ -767,7 +767,7 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { targetUpto = 0; outputAccumulator.push(arc.nextFinalOutput()); currentFrame = pushFrame(arc, 0); - outputAccumulator.pop(); + outputAccumulator.pop(arc.nextFinalOutput()); } // if (DEBUG) { @@ -841,7 +841,7 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { // if (DEBUG) System.out.println(" arc is final!"); outputAccumulator.push(arc.nextFinalOutput()); currentFrame = pushFrame(arc, targetUpto); - outputAccumulator.pop(); + outputAccumulator.pop(arc.nextFinalOutput()); // if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + // currentFrame.hasTerms); } @@ -1187,14 +1187,27 @@ static class OutputAccumulator extends DataInput { void push(BytesRef output) { if (output != Lucene90BlockTreeTermsReader.NO_OUTPUT) { + assert output.length > 0; outputs = ArrayUtil.grow(outputs, num + 1); outputs[num++] = output; } } - void pop() { - assert num > 0; - num--; + void pop(BytesRef output) { + if (output != Lucene90BlockTreeTermsReader.NO_OUTPUT) { + assert num > 0; + assert outputs[num - 1] == output; + num--; + } + } + + void pop(int cnt) { + assert num >= cnt; + num -= cnt; + } + + int outputCount() { + return num; } void reset() {