Skip to content

Commit

Permalink
use SMO service for class name and SHA1 queries.
Browse files Browse the repository at this point in the history
 - the remote index does not contain class name information anymore
 - sha1 queries are rarely used and contribute to index size,
   which makes them a good candidate for SMO too
  • Loading branch information
mbien committed May 24, 2023
1 parent 788ba1f commit 5277dc4
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 12 deletions.
3 changes: 3 additions & 0 deletions java/maven.indexer/external/binaries-list
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
# specific language governing permissions and limitations
# under the License.
C02F9C34FBC4C698BEF9D5074966545122908D06 org.apache.maven.indexer:indexer-core:7.0.2
20AE1ED178C812FAA78DC83C9BF218D047FD01D0 org.apache.maven.indexer:search-api:7.0.2
5967AEFBE9281C46155E73BBFC1E2BC4054CC92F org.apache.maven.indexer:search-backend-smo:7.0.2
2C08C7A491E9D033BB4806E0A45496E3A0667217 org.apache.lucene:lucene-core:9.6.0
A4819CA127C46A2759FDF091F41512C56C441FA7 org.apache.lucene:lucene-backward-codecs:9.6.0
B9E14451C73FA0BC8E71A2CFE12A442F37A53C69 org.apache.lucene:lucene-highlighter:9.6.0
C2F9EF9B7336981495272E99A1DF678A97966102 org.apache.lucene:lucene-queryparser:9.6.0
17A8B808BB7BF5F49FBFB9CBFF821433E2908E22 org.apache.lucene:lucene-analysis-common:9.6.0
479C1E06DB31C432330183F5CAE684163F186146 javax.annotation:javax.annotation-api:1.2
B3ADD478D4382B78EA20B1671390A858002FEB6C com.google.code.gson:gson:2.10.1
3 changes: 3 additions & 0 deletions java/maven.indexer/nbproject/project.properties
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ javac.source=11
javac.target=11
javac.compilerargs=-Xlint -Xlint:-serial
release.external/indexer-core-7.0.2.jar=modules/ext/maven/indexer-core-7.0.2.jar
release.external/search-api-7.0.2.jar=modules/ext/maven/search-api-7.0.2.jar
release.external/search-backend-smo-7.0.2.jar=modules/ext/maven/search-backend-smo-7.0.2.jar
release.external/gson-2.10.1.jar=modules/ext/maven/gson-2.10.1.jar
release.external/lucene-core-9.6.0.jar=modules/ext/maven/lucene-core-9.6.0.jar
release.external/lucene-backward-codecs-9.6.0.jar=modules/ext/maven/lucene-backward-codecs-9.6.0.jar
release.external/lucene-highlighter-9.6.0.jar=modules/ext/maven/lucene-highlighter-9.6.0.jar
Expand Down
12 changes: 12 additions & 0 deletions java/maven.indexer/nbproject/project.xml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,14 @@
<runtime-relative-path>ext/maven/indexer-core-7.0.2.jar</runtime-relative-path>
<binary-origin>external/indexer-core-7.0.2.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/maven/search-api-7.0.2.jar</runtime-relative-path>
<binary-origin>external/search-api-7.0.2.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/maven/search-backend-smo-7.0.2.jar</runtime-relative-path>
<binary-origin>external/search-backend-smo-7.0.2.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/maven/lucene-core-9.6.0.jar</runtime-relative-path>
<binary-origin>external/lucene-core-9.6.0.jar</binary-origin>
Expand All @@ -202,6 +210,10 @@
<runtime-relative-path>ext/maven/javax.annotation-api-1.2.jar</runtime-relative-path>
<binary-origin>external/javax.annotation-api-1.2.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/maven/gson-2.10.1.jar</runtime-relative-path>
<binary-origin>external/gson-2.10.1.jar</binary-origin>
</class-path-extension>
</data>
</configuration>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@
import java.nio.file.SimpleFileVisitor;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.time.Duration;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -65,6 +68,11 @@
import org.apache.maven.index.updater.IndexUpdateRequest;
import org.apache.maven.index.updater.IndexUpdater;
import org.apache.maven.index.updater.ResourceFetcher;
import org.apache.maven.search.SearchRequest;
import org.apache.maven.search.backend.smo.SmoSearchBackend;
import org.apache.maven.search.backend.smo.SmoSearchBackendFactory;
import org.apache.maven.search.request.FieldQuery;
import org.apache.maven.search.request.Paging;
import org.apache.maven.settings.Proxy;
import org.apache.maven.settings.Server;
import org.apache.maven.settings.crypto.DefaultSettingsDecryptionRequest;
Expand Down Expand Up @@ -167,6 +175,8 @@ public class NexusRepositoryIndexerImpl implements RepositoryIndexerImplementati
* For remote repo download and indexing tasks.
*/
private static final RequestProcessor RP_REMOTE = new RequestProcessor("maven-remote-indexing");

private final SmoSearchBackend smo = new SmoSearchBackendFactory().createDefault();

@Override
public boolean handlesRepository(RepositoryInfo repo) {
Expand Down Expand Up @@ -440,7 +450,7 @@ private boolean loadIndexingContext(final RepositoryInfo info) throws IOExceptio
try {
IndexSearcher.setMaxClauseCount(max);
response = searcher.searchIteratorPaged(isr, contexts);
LOGGER.log(Level.FINE, "passed on {0} clauses processing {1} with {2} hits", new Object[] {max, q, response.getTotalHitsCount()});
LOGGER.log(Level.INFO, "passed on {0} clauses processing {1} with {2} hits", new Object[] {max, q, response.getTotalHitsCount()});
return response;
} catch (IndexSearcher.TooManyClauses exc) {
LOGGER.log(Level.FINE, "TooManyClauses on {0} clauses processing {1}", new Object[] {max, q});
Expand Down Expand Up @@ -1196,12 +1206,38 @@ private ResultImplementation<NBVersionInfo> getVersions(final String groupId, fi

@Override
public ResultImplementation<NBVersionInfo> findVersionsByClass(final String className, List<RepositoryInfo> repos) {
ResultImpl<NBVersionInfo> result = new ResultImpl<>((ResultImpl<NBVersionInfo> result1) -> {
findVersionsByClass(className, result1, result1.getSkipped(), false);
});
return findVersionsByClass(className, result, repos, true);

Optional<RepositoryInfo> central = repos.stream()
.filter(repo -> repo.getId().equals(smo.getRepositoryId()))
.findFirst();

// remote index contains no class data -> use web service
if (central.isPresent()) {
List<RepositoryInfo> otherRepos = new ArrayList<>(repos);
otherRepos.remove(central.get());

SearchRequest request = new SearchRequest(new Paging(64),
FieldQuery.fieldQuery(className.contains(".") ?
org.apache.maven.search.MAVEN.FQ_CLASS_NAME
: org.apache.maven.search.MAVEN.CLASS_NAME, className));

return mergeResults(
() -> (ResultImpl) findVersionsByClass(className, otherRepos),
() -> {
try {
return new SMOResultImpl(smo.getRepositoryId(), smo.search(request));
} catch (IOException ex) {
return SMOResultImpl.empty();
}
});
} else {
ResultImpl<NBVersionInfo> result = new ResultImpl<>((ResultImpl<NBVersionInfo> result1) -> {
findVersionsByClass(className, result1, result1.getSkipped(), false);
});
return findVersionsByClass(className, result, repos, true);
}
}

private ResultImplementation<NBVersionInfo> findVersionsByClass(final String className, final ResultImpl<NBVersionInfo> result, List<RepositoryInfo> repos, final boolean skipUnIndexed) {
final List<NBVersionInfo> infos = new ArrayList<>(result.getResults());
final SkippedAction skipAction = new SkippedAction(result);
Expand Down Expand Up @@ -1345,14 +1381,36 @@ private static void convertToNBGroupInfo(Collection<NBVersionInfo> artifactInfos
}
}



@Override
public ResultImplementation<NBVersionInfo> findBySHA1(final String sha1, List<RepositoryInfo> repos) {
ResultImpl<NBVersionInfo> result = new ResultImpl<>((ResultImpl<NBVersionInfo> result1) -> {
findBySHA1(sha1, result1, result1.getSkipped(), false);
});
return findBySHA1(sha1, result, repos, true);

Optional<RepositoryInfo> central = repos.stream()
.filter(repo -> repo.getId().equals(smo.getRepositoryId()))
.findFirst();

// remote index contains no sh1 data -> use web service
if (central.isPresent()) {
List<RepositoryInfo> otherRepos = new ArrayList<>(repos);
otherRepos.remove(central.get());

SearchRequest request = new SearchRequest(new Paging(8),
FieldQuery.fieldQuery(org.apache.maven.search.MAVEN.SHA1, sha1));

return mergeResults(
() -> (ResultImpl) findBySHA1(sha1, otherRepos),
() -> {
try {
return new SMOResultImpl(smo.getRepositoryId(), smo.search(request));
} catch (IOException ex) {
return SMOResultImpl.empty();
}
});
} else {
ResultImpl<NBVersionInfo> result = new ResultImpl<>((ResultImpl<NBVersionInfo> result1) -> {
findBySHA1(sha1, result1, result1.getSkipped(), false);
});
return findBySHA1(sha1, result, repos, true);
}
}

private ResultImplementation<NBVersionInfo> findBySHA1(final String sha1, final ResultImpl<NBVersionInfo> result, List<RepositoryInfo> repos, final boolean skipUnIndexed) {
Expand Down Expand Up @@ -1699,6 +1757,24 @@ static NBVersionInfo convertToNBVersionInfo(ArtifactInfo ai) {
return nbvi;
}

private static <T> ResultImpl<T> mergeResults(Supplier<ResultImpl<T>> mergedResultsSupplier, Supplier<ResultImplementation<T>> resultsToAddSupplier) {

Instant start = Instant.now();
ResultImpl<T> mergedResults = mergedResultsSupplier.get();
List<T> r1 = mergedResults.getResults();
List<T> r2 = resultsToAddSupplier.get().getResults();
LOGGER.log(Level.INFO, "response time: "+Duration.between(start, Instant.now()).toMillis());

List<T> merged = new ArrayList<>(r2.size()+r1.size());
merged.addAll(r1);
merged.addAll(r2);

mergedResults.setResults(merged);
mergedResults.addReturnedResultCount(r2.size());
mergedResults.addTotalResultCount(r2.size());
return mergedResults;
}

private static Query setBooleanRewrite (final Query q) {
if (q instanceof MultiTermQuery) {
((MultiTermQuery)q).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.netbeans.modules.maven.indexer;

import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.maven.search.backend.smo.SmoSearchResponse;
import org.netbeans.modules.maven.indexer.api.NBVersionInfo;
import org.netbeans.modules.maven.indexer.spi.ResultImplementation;

/**
* Wraps search-maven-org query results.
*
* @author mbien
*/
final class SMOResultImpl implements ResultImplementation<NBVersionInfo> {

private List<NBVersionInfo> list = null;
private final String repoId;
private final SmoSearchResponse response;

public SMOResultImpl(String repoId, SmoSearchResponse response) {
this.repoId = repoId;
this.response = response;
}

@Override
public List<NBVersionInfo> getResults() {
if (list == null) {
list = response.getPage().stream()
.map(rec -> new NBVersionInfo(
repoId,
rec.getValue(org.apache.maven.search.MAVEN.GROUP_ID),
rec.getValue(org.apache.maven.search.MAVEN.ARTIFACT_ID),
rec.getValue(org.apache.maven.search.MAVEN.VERSION),
rec.getValue(org.apache.maven.search.MAVEN.PACKAGING), // todo, type is used in the UI as packaging??
rec.getValue(org.apache.maven.search.MAVEN.PACKAGING),
null,
null,
rec.getValue(org.apache.maven.search.MAVEN.CLASSIFIER)))
.collect(Collectors.toList());
}
return list;
}

@Override
public boolean isPartial() {
return false;
}

@Override
public void waitForSkipped() {
throw new UnsupportedOperationException();
}

@Override
public int getTotalResultCount() {
return response.getTotalHits();
}

@Override
public int getReturnedResultCount() {
return response.getCurrentHits();
}

public static ResultImplementation<NBVersionInfo> empty() {
return new ResultImplementation<NBVersionInfo>() {
@Override
public boolean isPartial() {
return false;
}
@Override
public List<NBVersionInfo> getResults() {
return Collections.emptyList();
}
@Override
public int getTotalResultCount() {
return 0;
}
@Override
public int getReturnedResultCount() {
return 0;
}
@Override
public void waitForSkipped() {}
};
}

}

0 comments on commit 5277dc4

Please sign in to comment.