From f203f1c6679f84b87b3a57ee5279dac56f587193 Mon Sep 17 00:00:00 2001 From: Drew Farris Date: Tue, 19 Sep 2023 15:45:10 +0000 Subject: [PATCH] Completed SSDeepSimilarityQueryLogic configuraton implementation --- .../SSDeepSimilarityQueryConfiguration.java | 10 ------- .../tables/SSDeepSimilarityQueryLogic.java | 30 +++++++++++++++---- .../SSDeepSimilarityQueryTransformer.java | 11 +++---- .../java/datawave/query/SSDeepQueryTest.java | 10 +++---- .../SSDeepSimilarityQueryTransformerTest.java | 13 ++++---- .../query/SSDeepQueryLogicFactory.xml | 8 +---- 6 files changed, 43 insertions(+), 39 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/config/SSDeepSimilarityQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/SSDeepSimilarityQueryConfiguration.java index 8c0f69c48ae..8493e7123ac 100644 --- a/warehouse/query-core/src/main/java/datawave/query/config/SSDeepSimilarityQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/config/SSDeepSimilarityQueryConfiguration.java @@ -41,8 +41,6 @@ public class SSDeepSimilarityQueryConfiguration extends GenericQueryConfiguratio public SSDeepSimilarityQueryConfiguration() { super(); query = new QueryImpl(); - bucketEncoder = new IntegerEncoding(bucketEncodingBase, bucketEncodingLength); - chunkSizeEncoder = new ChunkSizeEncoding(); } public SSDeepSimilarityQueryConfiguration(BaseQueryLogic configuredLogic) { @@ -116,12 +114,4 @@ public int getBucketEncodingLength() { public void setBucketEncodingLength(int bucketEncodingLength) { this.bucketEncodingLength = bucketEncodingLength; } - - public IntegerEncoding getBucketEncoder() { - return bucketEncoder; - } - - public ChunkSizeEncoding getChunkSizeEncoder() { - return chunkSizeEncoder; - } } diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/SSDeepSimilarityQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/SSDeepSimilarityQueryLogic.java index 0902709ddd1..c6cefee8d12 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/SSDeepSimilarityQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/SSDeepSimilarityQueryLogic.java @@ -120,12 +120,13 @@ public void setupRanges(Query settings, SSDeepSimilarityQueryConfiguration confi final Multimap queryMap = nGramEngine.preprocessQueries(queries); final Set ranges = new TreeSet<>(); - final ChunkSizeEncoding chunkSizeEncoding = config.getChunkSizeEncoder(); - final IntegerEncoding bucketEncoder = config.getBucketEncoder(); + final IntegerEncoding bucketEncoder = new IntegerEncoding(config.getBucketEncodingBase(), config.getBucketEncodingLength()); + final ChunkSizeEncoding chunkSizeEncoder = new ChunkSizeEncoding(); + final int indexBuckets = config.getIndexBuckets(); for (NGramTuple ct : queryMap.keys()) { - final String sizeAndChunk = chunkSizeEncoding.encode(ct.getChunkSize()) + ct.getChunk(); + final String sizeAndChunk = chunkSizeEncoder.encode(ct.getChunkSize()) + ct.getChunk(); for (int i = 0; i < indexBuckets; i++) { final String bucketedSizeAndChunk = bucketEncoder.encode(i) + sizeAndChunk; ranges.add(Range.exact(new Text(bucketedSizeAndChunk))); @@ -172,8 +173,7 @@ public AccumuloConnectionFactory.Priority getConnectionPriority() { @Override public QueryLogicTransformer getTransformer(Query settings) { final SSDeepSimilarityQueryConfiguration config = getConfig(); - return new SSDeepSimilarityQueryTransformer(settings, config.getQueryMap(), config.getBucketEncoder(), config.getChunkSizeEncoder(), - this.markingFunctions, this.responseObjectFactory); + return new SSDeepSimilarityQueryTransformer(settings, config, this.markingFunctions, this.responseObjectFactory); } @Override @@ -190,4 +190,24 @@ public Set getRequiredQueryParameters() { public Set getExampleQueries() { return Collections.emptySet(); } + + public void setIndexBuckets(int indexBuckets) { + getConfig().setIndexBuckets(indexBuckets); + } + + public void setQueryThreads(int queryThreads) { + getConfig().setQueryThreads(queryThreads); + } + + public void setMaxRepeatedCharacters(int maxRepeatedCharacters) { + getConfig().setMaxRepeatedCharacters(maxRepeatedCharacters); + } + + public void setBucketEncodingBase(int bucketEncodingBase) { + getConfig().setBucketEncodingBase(bucketEncodingBase); + } + + public void setBucketEncodingLength(int bucketEncodingLength) { + getConfig().setBucketEncodingLength(bucketEncodingLength); + } } diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/SSDeepSimilarityQueryTransformer.java b/warehouse/query-core/src/main/java/datawave/query/transformer/SSDeepSimilarityQueryTransformer.java index 54a214a2483..35a34381bfa 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/SSDeepSimilarityQueryTransformer.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/SSDeepSimilarityQueryTransformer.java @@ -16,6 +16,7 @@ import com.google.common.collect.TreeMultimap; import datawave.marking.MarkingFunctions; +import datawave.query.config.SSDeepSimilarityQueryConfiguration; import datawave.query.util.ssdeep.ChunkSizeEncoding; import datawave.query.util.ssdeep.IntegerEncoding; import datawave.query.util.ssdeep.NGramScoreTuple; @@ -62,15 +63,15 @@ public class SSDeepSimilarityQueryTransformer extends BaseQueryLogicTransformer< /** Tracks which ssdeep hashes each of the ngrams originated from */ final Multimap queryMap; - public SSDeepSimilarityQueryTransformer(Query query, Multimap queryMap, IntegerEncoding bucketEncoder, - ChunkSizeEncoding chunkSizeEncoding, MarkingFunctions markingFunctions, ResponseObjectFactory responseObjectFactory) { + public SSDeepSimilarityQueryTransformer(Query query, SSDeepSimilarityQueryConfiguration config, MarkingFunctions markingFunctions, + ResponseObjectFactory responseObjectFactory) { super(markingFunctions); this.auths = new Authorizations(query.getQueryAuthorizations().split(",")); - this.queryMap = queryMap; + this.queryMap = config.getQueryMap(); this.responseObjectFactory = responseObjectFactory; - this.bucketEncoder = bucketEncoder; - this.chunkSizeEncoding = chunkSizeEncoding; + this.bucketEncoder = new IntegerEncoding(config.getBucketEncodingBase(), config.getBucketEncodingLength()); + this.chunkSizeEncoding = new ChunkSizeEncoding(); this.chunkStart = bucketEncoder.getLength(); this.chunkEnd = chunkStart + chunkSizeEncoding.getLength(); diff --git a/warehouse/query-core/src/test/java/datawave/query/SSDeepQueryTest.java b/warehouse/query-core/src/test/java/datawave/query/SSDeepQueryTest.java index 75028011cfb..26874ff30a8 100644 --- a/warehouse/query-core/src/test/java/datawave/query/SSDeepQueryTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/SSDeepQueryTest.java @@ -11,11 +11,9 @@ import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.BatchWriterConfig; -import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.admin.TableOperations; -import org.apache.accumulo.core.client.security.tokens.PasswordToken; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Value; @@ -116,17 +114,17 @@ public static void loadData() throws Exception { InMemoryInstance i = new InMemoryInstance("ssdeepTestInstance"); accumuloClient = new InMemoryAccumuloClient("root", i); - /** create the table */ + /* create the table */ TableOperations tops = accumuloClient.tableOperations(); if (tops.exists(tableName)) { tops.delete(tableName); } tops.create(tableName); - /** add ssdeep data to the table */ + /* add ssdeep data to the table */ indexSSDeepTestData(accumuloClient); - /** dump the table */ + /* dump the table */ logSSDeepTestData(tableName); } @@ -149,7 +147,7 @@ public void setUpQuery() { this.logic.setResponseObjectFactory(new DefaultResponseObjectFactory()); SubjectIssuerDNPair dn = SubjectIssuerDNPair.of("userDn", "issuerDn"); - DatawaveUser user = new DatawaveUser(dn, DatawaveUser.UserType.USER, Sets.newHashSet(this.auths.toString().split(",")), null, null, -1L); + DatawaveUser user = new DatawaveUser(dn, DatawaveUser.UserType.USER, Sets.newHashSet(auths.toString().split(",")), null, null, -1L); this.principal = new DatawavePrincipal(Collections.singleton(user)); } diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java index 3b716c6724d..4cf1042f585 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/SSDeepSimilarityQueryTransformerTest.java @@ -21,6 +21,7 @@ import com.google.common.collect.TreeMultimap; import datawave.marking.MarkingFunctions; +import datawave.query.config.SSDeepSimilarityQueryConfiguration; import datawave.query.tables.SSDeepSimilarityQueryLogic; import datawave.query.util.ssdeep.ChunkSizeEncoding; import datawave.query.util.ssdeep.IntegerEncoding; @@ -61,10 +62,6 @@ public void transformTest() { int bucketEncodingBase = 32; int bucketEncodingLength = 2; - IntegerEncoding bucketEncoder = new IntegerEncoding(bucketEncodingBase, bucketEncodingLength); - ; - ChunkSizeEncoding chunkSizeEncoding = new ChunkSizeEncoding(); - NGramTuple tuple = new NGramTuple(chunkSize, chunk); SSDeepHash hash = SSDeepHash.parse(ssdeepString); @@ -75,12 +72,16 @@ public void transformTest() { Value value = new Value(); AbstractMap.SimpleEntry entry = new AbstractMap.SimpleEntry<>(key, value); + SSDeepSimilarityQueryConfiguration config = SSDeepSimilarityQueryConfiguration.create(); + config.setBucketEncodingBase(bucketEncodingBase); + config.setBucketEncodingLength(bucketEncodingLength); + config.setQueryMap(queryMap); + basicExpects(key); PowerMock.replayAll(); - SSDeepSimilarityQueryTransformer transformer = new SSDeepSimilarityQueryTransformer(mockQuery, queryMap, bucketEncoder, chunkSizeEncoding, - mockMarkingFunctions, mockResponseFactory); + SSDeepSimilarityQueryTransformer transformer = new SSDeepSimilarityQueryTransformer(mockQuery, config, mockMarkingFunctions, mockResponseFactory); Map.Entry transformedTuple = transformer.transform(entry); List resultList = new ArrayList<>(); resultList.add(transformedTuple); diff --git a/web-services/deploy/configuration/src/main/resources/datawave/query/SSDeepQueryLogicFactory.xml b/web-services/deploy/configuration/src/main/resources/datawave/query/SSDeepQueryLogicFactory.xml index 8bd08655063..628b5bddee3 100644 --- a/web-services/deploy/configuration/src/main/resources/datawave/query/SSDeepQueryLogicFactory.xml +++ b/web-services/deploy/configuration/src/main/resources/datawave/query/SSDeepQueryLogicFactory.xml @@ -2,14 +2,8 @@ + http://www.springframework.org/schema/beans/spring-beans-4.0.xsd">