From 9ec160144afff462d6f042fb679f1d72f4609723 Mon Sep 17 00:00:00 2001 From: Tamas Cservenak Date: Thu, 24 Nov 2022 09:05:56 +0100 Subject: [PATCH 1/2] [MINDEXER-176] Clean up silos The MT updater did not clean up silos after done. --- https://issues.apache.org/jira/browse/MINDEXER-176 --- .../maven/index/updater/IndexDataReader.java | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java b/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java index aac2ef4e..3e2139ef 100644 --- a/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java +++ b/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java @@ -23,6 +23,7 @@ import java.io.DataInput; import java.io.DataInputStream; import java.io.EOFException; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.UTFDataFormatException; @@ -171,11 +172,14 @@ private IndexDataReadResult readIndexMT( IndexWriter w, IndexingContext context ExecutorService executorService = Executors.newFixedThreadPool( threads ); ArrayList errors = new ArrayList<>(); - ArrayList silos = new ArrayList<>( threads ); + ArrayList siloDirectories = new ArrayList<>( threads ); + ArrayList siloWriters = new ArrayList<>( threads ); for ( int i = 0; i < threads; i++ ) { final int silo = i; - silos.add( tempWriter( "silo" + i ) ); + FSDirectory siloDirectory = tempDirectory( "silo" + i ); + siloDirectories.add( siloDirectory ); + siloWriters.add( tempWriter( siloDirectory ) ); executorService.execute( () -> { LOGGER.debug( "Starting thread {}", Thread.currentThread().getName() ); @@ -190,7 +194,7 @@ private IndexDataReadResult readIndexMT( IndexWriter w, IndexingContext context { break; } - addToIndex( doc, context, silos.get( silo ), rootGroups, allGroups ); + addToIndex( doc, context, siloWriters.get( silo ), rootGroups, allGroups ); } catch ( InterruptedException | IOException e ) { @@ -245,10 +249,18 @@ private IndexDataReadResult readIndexMT( IndexWriter w, IndexingContext context } LOGGER.debug( "Merging silos..." ); - for ( IndexWriter silo : silos ) + for ( IndexWriter siloWriter : siloWriters ) { - IndexUtils.close( silo ); - w.addIndexes( silo.getDirectory() ); + siloWriter.commit(); + siloWriter.close(); + } + LOGGER.debug( "Cleanup of silos..." ); + for ( FSDirectory siloDirectory : siloDirectories ) + { + w.addIndexes( siloDirectory ); + File dir = siloDirectory.getDirectory().toFile(); + siloDirectory.close(); + IndexUtils.delete( dir ); } LOGGER.debug( "Merged silos..." ); @@ -269,11 +281,11 @@ private FSDirectory tempDirectory( final String name ) throws IOException return FSDirectory.open( Files.createTempDirectory( name + ".dir" ) ); } - private IndexWriter tempWriter( final String name ) throws IOException + private IndexWriter tempWriter( final FSDirectory directory ) throws IOException { IndexWriterConfig config = new IndexWriterConfig( new NexusAnalyzer() ); config.setUseCompoundFile( false ); - return new NexusIndexWriter( tempDirectory( name ), config ); + return new NexusIndexWriter( directory, config ); } private void addToIndex( final Document doc, final IndexingContext context, final IndexWriter indexWriter, From dd78ed4eb12f1bdda43214ab9ffde89bdad23106 Mon Sep 17 00:00:00 2001 From: Tamas Cservenak Date: Fri, 25 Nov 2022 14:25:50 +0100 Subject: [PATCH 2/2] Rework logging and regroup a bit: do not call addIndexes from loop when it can be batched. --- .../maven/index/updater/IndexDataReader.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java b/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java index 3e2139ef..f826a562 100644 --- a/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java +++ b/indexer-core/src/main/java/org/apache/maven/index/updater/IndexDataReader.java @@ -47,6 +47,7 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.maven.index.ArtifactInfo; import org.apache.maven.index.context.IndexUtils; @@ -174,6 +175,7 @@ private IndexDataReadResult readIndexMT( IndexWriter w, IndexingContext context ArrayList errors = new ArrayList<>(); ArrayList siloDirectories = new ArrayList<>( threads ); ArrayList siloWriters = new ArrayList<>( threads ); + LOGGER.debug( "Creating {} silo writer threads...", threads ); for ( int i = 0; i < threads; i++ ) { final int silo = i; @@ -210,6 +212,7 @@ private IndexDataReadResult readIndexMT( IndexWriter w, IndexingContext context } ); } + LOGGER.debug( "Loading up documents into silos" ); try { Document doc; @@ -248,22 +251,25 @@ private IndexDataReadResult readIndexMT( IndexWriter w, IndexingContext context IndexUtils.updateTimestamp( w.getDirectory(), date ); } - LOGGER.debug( "Merging silos..." ); + LOGGER.debug( "Closing silo writers..." ); for ( IndexWriter siloWriter : siloWriters ) { siloWriter.commit(); siloWriter.close(); } - LOGGER.debug( "Cleanup of silos..." ); + + LOGGER.debug( "Merging silo directories..." ); + w.addIndexes( siloDirectories.toArray( new Directory[0] ) ); + + LOGGER.debug( "Cleanup of silo directories..." ); for ( FSDirectory siloDirectory : siloDirectories ) { - w.addIndexes( siloDirectory ); File dir = siloDirectory.getDirectory().toFile(); siloDirectory.close(); IndexUtils.delete( dir ); } - LOGGER.debug( "Merged silos..." ); + LOGGER.debug( "Finalizing..." ); w.commit(); IndexDataReadResult result = new IndexDataReadResult();