Skip to content

Commit

Permalink
demonstrate failure caused by adding filters
Browse files Browse the repository at this point in the history
Signed-off-by: Mikhail Khludnev <mkhl@apache.org>
  • Loading branch information
mkhludnev committed Nov 19, 2024
1 parent 8e8af99 commit 5849b96
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 90 deletions.
107 changes: 53 additions & 54 deletions server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -271,29 +271,65 @@ public Query termQuery(Object value, @Nullable QueryShardContext context) {
public Query termsQuery(List<?> values, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
Tuple<List<InetAddress>, List<String>> ipsMasks = splitIpsAndMasks(values);
QueryUnion combiner = new QueryUnion();
convertIps(ipsMasks.v1(), combiner);
convertMasks(ipsMasks.v2(), context, combiner, combiner.getAsInt());
return combiner.get();
List<Query> combiner = new ArrayList<>();
convertIps(ipsMasks.v1(), combiner::add);
convertMasks(ipsMasks.v2(), context, combiner::add, combiner::size);
if (combiner.size()==1) {
return combiner.get(0);
}
BooleanQuery.Builder bqb = new BooleanQuery.Builder();
for (Query q : combiner) {
bqb.add(q, BooleanClause.Occur.SHOULD);
}
return new ConstantScoreQuery(bqb.build());
}

private void convertMasks(List<String> masks, QueryShardContext context, Consumer<Query> combiner, int clauses) {
private void convertMasks(List<String> masks, QueryShardContext context, Consumer<Query> combiner, IntSupplier clauses) {
if (!masks.isEmpty()) {
// attempting to avoid too many exception at best
if (masks.size() + clauses >= IndexSearcher.getMaxClauseCount() - 1 && isSearchable()) {
IpMultiRangeQueryBuilder multiRange = new IpMultiRangeQueryBuilder(name());
for (String strVal : masks) {
final Tuple<InetAddress, Integer> cidr = InetAddresses.parseCidr(strVal);
PointRangeQuery query = (PointRangeQuery) InetAddressPoint.newPrefixQuery(name(), cidr.v1(), cidr.v2());

boolean tooMany = masks.size() + clauses.getAsInt() > IndexSearcher.getMaxClauseCount();
if (tooMany) {
if (!isSearchable()) {
throw new IndexSearcher.TooManyClauses("can't search for " + masks.size() +
" IP masks in `index:false` field " + name());
}
} // let's collect multirange and bq of dv-range
// loop masks, collect ranges
IpMultiRangeQueryBuilder multiRange = null;
BooleanQuery.Builder dvQueries = null;
for (String mask : masks) {
final Tuple<InetAddress, Integer> cidr = InetAddresses.parseCidr(mask);
PointRangeQuery query = (PointRangeQuery) InetAddressPoint.newPrefixQuery(name(), cidr.v1(), cidr.v2());
if (isSearchable()) {
if (multiRange==null) {
multiRange = new IpMultiRangeQueryBuilder(name());
}
multiRange.add(query.getLowerPoint(), query.getUpperPoint());
}
combiner.accept(multiRange.build());
} else {
for (String strVal : masks) {
combiner.accept(termQuery(strVal, context));
if (hasDocValues() && !tooMany) {
if (dvQueries==null) {
dvQueries = new BooleanQuery.Builder();
}
dvQueries.add(SortedSetDocValuesField.newSlowRangeQuery(
name(),
new BytesRef(query.getLowerPoint()),
new BytesRef(query.getUpperPoint()),
true,
true
), BooleanClause.Occur.SHOULD);
}
}
// && isSearchable()
if (multiRange!=null && dvQueries!=null) {
combiner.accept(new IndexOrDocValuesQuery(multiRange.build(), dvQueries.build()));
} else {
if (multiRange!=null) {
combiner.accept(multiRange.build());
}
if (dvQueries!=null) {
combiner.accept(dvQueries.build());
}
}
}
}

Expand All @@ -309,13 +345,14 @@ private void convertIps(List<InetAddress> inetAddresses, Consumer<Query> combine
set.add(new BytesRef(InetAddressPoint.encode(address)));
}
Query dvQuery = SortedSetDocValuesField.newSlowSetQuery(name(), set);
// TODO remove closure
if (!isSearchable()) {
pointsQuery = () -> dvQuery;
} else {
Supplier<Query> wrap = pointsQuery;
pointsQuery = () -> new IndexOrDocValuesQuery(wrap.get(), dvQuery);
}
}
}// TODO just a list
combiner.accept(pointsQuery.get());
}
}
Expand Down Expand Up @@ -480,44 +517,6 @@ public DocValueFormat docValueFormat(@Nullable String format, ZoneId timeZone) {
}
return DocValueFormat.IP;
}

private static class QueryUnion implements Consumer<Query>, Supplier<Query>, IntSupplier {
Query first;
BooleanQuery.Builder union;
int cnt;

@Override
public void accept(Query query) {
if (first == null) {
first = query;
} else {
if (union == null) {
union = new BooleanQuery.Builder();
union.add(first, BooleanClause.Occur.SHOULD);
}
union.add(query, BooleanClause.Occur.SHOULD);
}
cnt++;
}

@Override
public Query get() {
if (union != null) {
return new ConstantScoreQuery(union.build());
} else {
if (first != null) {
return first;
} else { // no matches then
return new BooleanQuery.Builder().build();
}
}
}

@Override
public int getAsInt() {
return cnt;
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,29 @@

package org.opensearch.search;

import org.apache.lucene.search.IndexSearcher;
import org.opensearch.action.bulk.BulkRequestBuilder;
import org.opensearch.action.search.SearchPhaseExecutionException;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.common.network.InetAddresses;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.query.BoolQueryBuilder;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.index.query.TermsQueryBuilder;
import org.opensearch.test.OpenSearchSingleNodeTestCase;
import org.hamcrest.MatcherAssert;

import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.*;
import java.util.function.Consumer;

import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
import static org.hamcrest.Matchers.equalTo;

public class SearchIpFieldTermsTests extends OpenSearchSingleNodeTestCase {

public static final boolean IPv4_ONLY = true;
static String defaultIndexName = "test";

public void testMassive() throws Exception {
Expand All @@ -41,21 +40,22 @@ public void testMassive() throws Exception {

BulkRequestBuilder bulkRequestBuilder = client().prepareBulk();

Set<String> dedupeCidrs = new HashSet<>();
int cidrs = 0;
int ips = 0;
List<String> toQuery = new ArrayList<>();
for (int i = 0; ips <= 10240 && cidrs <= 1024 && i < 1000000; i++) {
final String ip;
final int prefix;
if (IPv4_ONLY) {
for (int i = 0; ips <= 10240 && cidrs <= IndexSearcher.getMaxClauseCount()+10 && i < 1000000; i++) {
String ip;
int prefix;
boolean mask;
do{
mask = random().nextBoolean();
ip = generateRandomIPv4();
prefix = 8 + random().nextInt(24); // CIDR prefix for IPv4
} else {
ip = generateRandomIPv6();
prefix = 32 + random().nextInt(97); // CIDR prefix for IPv6
}
prefix = 24 + random().nextInt(8); // CIDR prefix for IPv4
}while(mask && !dedupeCidrs.add(getFirstThreeOctets(ip)));

bulkRequestBuilder.add(client().prepareIndex(defaultIndexName).setSource(Map.of("addr", ip)));
bulkRequestBuilder.add(client().prepareIndex(defaultIndexName).
setSource(Map.of("addr", ip, "dummy_filter", randomSubsetOf(1,"1","2","3"))));

final String termToQuery;
if (random().nextBoolean()) {
Expand All @@ -66,16 +66,32 @@ public void testMassive() throws Exception {
ips++;
}
toQuery.add(termToQuery);

if (cidrs == IndexSearcher.getMaxClauseCount()-1) {
bulkRequestBuilder.setRefreshPolicy(IMMEDIATE).get();
bulkRequestBuilder = client().prepareBulk();
long expectedMatches = (long) cidrs + ips ;
assertTermsHitCount("addr.dv", toQuery, expectedMatches);
// after this passed add dummy filter
assertTermsHitCount("addr.dv", toQuery, expectedMatches, (boolBuilder)->{
boolBuilder.filter(QueryBuilders.termsQuery("dummy_filter","1","2","3"))
.filter(QueryBuilders.termsQuery("dummy_filter","1","2","3","4"))
.filter(QueryBuilders.termsQuery("dummy_filter","1","2","3","4","5"));
});
}
if (cidrs == IndexSearcher.getMaxClauseCount()) {// this exceeds clauses precondition
bulkRequestBuilder.setRefreshPolicy(IMMEDIATE).get();
bulkRequestBuilder = client().prepareBulk();
long expectedMatches = (long) cidrs + ips ;
assertTermsHitCount("addr.dv", toQuery, expectedMatches);
}
}
int addMatches = 0;
for (int i = 0; i < atLeast(100); i++) {
final String ip;
if (IPv4_ONLY) {
ip = generateRandomIPv4();
} else {
ip = generateRandomIPv6();
}
bulkRequestBuilder.add(client().prepareIndex(defaultIndexName).setSource(Map.of("addr", ip)));
ip = generateRandomIPv4();
bulkRequestBuilder.add(client().prepareIndex(defaultIndexName).setSource(Map.of("addr", ip,
"dummy_filter", randomSubsetOf(1,"1","2","3"))));
boolean match = false;
for (String termQ : toQuery) {
boolean isCidr = termQ.contains("/");
Expand All @@ -93,15 +109,45 @@ public void testMassive() throws Exception {

bulkRequestBuilder.setRefreshPolicy(IMMEDIATE).get();
long expectedMatches = (long) cidrs + ips + addMatches;
for (String field : List.of("addr", "addr.idx", "addr.dv")) {
for (String field : List.of("addr", "addr.idx" /*"addr.dv"*/)) {
assertTermsHitCount(field, toQuery, expectedMatches);
}

try {
assertTermsHitCount("addr.dv", toQuery, expectedMatches);
fail();
}catch (SearchPhaseExecutionException tmc) {
Throwable cause = tmc.shardFailures()[0].getCause().getCause();
assertTrue (cause instanceof IndexSearcher.TooManyClauses);
assertTrue(cause.getMessage().contains("IP"));
assertTrue(cause.getMessage().contains("masks"));
}
}

public static String getFirstThreeOctets(String ipAddress) {
// Split the IP address by the dot delimiter
String[] octets = ipAddress.split("\\.");

// Take the first three octets
String[] firstThreeOctets = new String[3];
System.arraycopy(octets, 0, firstThreeOctets, 0, 3);

// Join the first three octets back together with dots
return String.join(".", firstThreeOctets);
}

private void assertTermsHitCount(String field, Collection<String> toQuery, long expectedMatches) {
assertTermsHitCount(field, toQuery, expectedMatches, (bqb)->{});
}

private void assertTermsHitCount(String field, Collection<String> toQuery, long expectedMatches, Consumer<BoolQueryBuilder> addFilter) {
TermsQueryBuilder ipTerms = QueryBuilders.termsQuery(field, new ArrayList<>(toQuery));
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
addFilter.accept(boolQueryBuilder);
SearchResponse result = client().prepareSearch(defaultIndexName)
.setQuery(QueryBuilders.boolQuery().must(ipTerms).filter(QueryBuilders.termsQuery("dummy_filter", "a", "b")))
.setQuery(boolQueryBuilder.must(ipTerms)
//.filter(QueryBuilders.termsQuery("dummy_filter", "a", "b"))
)
.get();
long hitsFound = Objects.requireNonNull(result.getHits().getTotalHits()).value;
MatcherAssert.assertThat(field, hitsFound, equalTo(expectedMatches));
Expand Down Expand Up @@ -153,18 +199,6 @@ private String generateRandomIPv4() {
);
}

// Generate a random IPv6 address
private static String generateRandomIPv6() {
StringBuilder ipv6 = new StringBuilder();
for (int i = 0; i < 8; i++) {
ipv6.append(Integer.toHexString(random().nextInt(0xFFFF + 1)));
if (i < 7) {
ipv6.append(":");
}
}
return ipv6.toString();
}

private XContentBuilder createMapping() throws IOException {
return XContentFactory.jsonBuilder()
.startObject()
Expand Down

0 comments on commit 5849b96

Please sign in to comment.