Fix weird NRT bug apache#13353 (apache#13369)

The issue outlines the problem. When we have point value dimensions, segment core readers assume that there will be point files. However, when allowing soft deletes and a document fails indexing failed before a point field could be written, this assumption fails. Consequently, the NRT fails to open. I settled on always flushing a point file if the field info says there are point fields, even if there aren't any docs in the buffer. closes apache#13353
jpountz · May 15, 2024 · b1d3c08 · b1d3c08
1 parent 46f1f95
commit b1d3c08
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 1 deletion.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -367,6 +367,9 @@ Bug Fixes
 
 * GITHUB#13366: Disallow NaN and Inf values in scalar quantization and better handle extreme cases. (Ben Trent)
 
+* GITHUB#13369: Fix NRT opening failure when soft deletes are enabled and the document fails to index before a point
+  field is written (Ben Trent)
+
 Build
 ---------------------
 

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
@@ -375,7 +375,7 @@ private void writePoints(SegmentWriteState state, Sorter.DocMap sortMap) throws
         while (perField != null) {
           if (perField.pointValuesWriter != null) {
             // We could have initialized pointValuesWriter, but failed to write even a single doc
-            if (perField.pointValuesWriter.getNumDocs() > 0) {
+            if (perField.fieldInfo.getPointDimensionCount() > 0) {
               if (pointsWriter == null) {
                 // lazy init
                 PointsFormat fmt = state.segmentInfo.getCodec().pointsFormat();

diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
@@ -47,6 +47,7 @@
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.ByteBuffersDirectory;
@@ -2309,4 +2310,36 @@ && callStackContains(IndexFileDeleter.class, "deleteFiles")) {
       DirectoryReader.open(dir).close();
     }
   }
+
+  public void testExceptionJustBeforeFlushWithPointValues() throws Exception {
+    Directory dir = newDirectory();
+    Analyzer analyzer =
+        new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
+          @Override
+          public TokenStreamComponents createComponents(String fieldName) {
+            MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+            tokenizer.setEnableChecks(
+                false); // disable workflow checking as we forcefully close() in exceptional cases.
+            TokenStream stream = new CrashingFilter(fieldName, tokenizer);
+            return new TokenStreamComponents(tokenizer, stream);
+          }
+        };
+    IndexWriterConfig iwc =
+        newIndexWriterConfig(analyzer).setCommitOnClose(false).setMaxBufferedDocs(3);
+    MergePolicy mp = iwc.getMergePolicy();
+    iwc.setMergePolicy(
+        new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, mp));
+    IndexWriter w = RandomIndexWriter.mockIndexWriter(dir, iwc, random());
+    Document newdoc = new Document();
+    newdoc.add(newTextField("crash", "do it on token 4", Field.Store.NO));
+    newdoc.add(new IntPoint("int", 42));
+    expectThrows(IOException.class, () -> w.addDocument(newdoc));
+    DirectoryReader r = w.getReader(false, false);
+    LeafReader onlyReader = getOnlyLeafReader(r);
+    // we mark the failed doc as deleted
+    assertEquals(onlyReader.numDeletedDocs(), 1);
+    onlyReader.close();
+    w.close();
+    dir.close();
+  }
 }