Skip to content

Commit

Permalink
Completed SSDeepSimilarityQueryLogic configuraton implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
drewfarris committed Sep 19, 2023
1 parent 03e7540 commit f203f1c
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ public class SSDeepSimilarityQueryConfiguration extends GenericQueryConfiguratio
public SSDeepSimilarityQueryConfiguration() {
super();
query = new QueryImpl();
bucketEncoder = new IntegerEncoding(bucketEncodingBase, bucketEncodingLength);
chunkSizeEncoder = new ChunkSizeEncoding();
}

public SSDeepSimilarityQueryConfiguration(BaseQueryLogic<?> configuredLogic) {
Expand Down Expand Up @@ -116,12 +114,4 @@ public int getBucketEncodingLength() {
public void setBucketEncodingLength(int bucketEncodingLength) {
this.bucketEncodingLength = bucketEncodingLength;
}

public IntegerEncoding getBucketEncoder() {
return bucketEncoder;
}

public ChunkSizeEncoding getChunkSizeEncoder() {
return chunkSizeEncoder;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,13 @@ public void setupRanges(Query settings, SSDeepSimilarityQueryConfiguration confi
final Multimap<NGramTuple,SSDeepHash> queryMap = nGramEngine.preprocessQueries(queries);
final Set<Range> ranges = new TreeSet<>();

final ChunkSizeEncoding chunkSizeEncoding = config.getChunkSizeEncoder();
final IntegerEncoding bucketEncoder = config.getBucketEncoder();
final IntegerEncoding bucketEncoder = new IntegerEncoding(config.getBucketEncodingBase(), config.getBucketEncodingLength());
final ChunkSizeEncoding chunkSizeEncoder = new ChunkSizeEncoding();

final int indexBuckets = config.getIndexBuckets();

for (NGramTuple ct : queryMap.keys()) {
final String sizeAndChunk = chunkSizeEncoding.encode(ct.getChunkSize()) + ct.getChunk();
final String sizeAndChunk = chunkSizeEncoder.encode(ct.getChunkSize()) + ct.getChunk();
for (int i = 0; i < indexBuckets; i++) {
final String bucketedSizeAndChunk = bucketEncoder.encode(i) + sizeAndChunk;
ranges.add(Range.exact(new Text(bucketedSizeAndChunk)));
Expand Down Expand Up @@ -172,8 +173,7 @@ public AccumuloConnectionFactory.Priority getConnectionPriority() {
@Override
public QueryLogicTransformer getTransformer(Query settings) {
final SSDeepSimilarityQueryConfiguration config = getConfig();
return new SSDeepSimilarityQueryTransformer(settings, config.getQueryMap(), config.getBucketEncoder(), config.getChunkSizeEncoder(),
this.markingFunctions, this.responseObjectFactory);
return new SSDeepSimilarityQueryTransformer(settings, config, this.markingFunctions, this.responseObjectFactory);
}

@Override
Expand All @@ -190,4 +190,24 @@ public Set<String> getRequiredQueryParameters() {
public Set<String> getExampleQueries() {
return Collections.emptySet();
}

public void setIndexBuckets(int indexBuckets) {
getConfig().setIndexBuckets(indexBuckets);
}

public void setQueryThreads(int queryThreads) {
getConfig().setQueryThreads(queryThreads);
}

public void setMaxRepeatedCharacters(int maxRepeatedCharacters) {
getConfig().setMaxRepeatedCharacters(maxRepeatedCharacters);
}

public void setBucketEncodingBase(int bucketEncodingBase) {
getConfig().setBucketEncodingBase(bucketEncodingBase);
}

public void setBucketEncodingLength(int bucketEncodingLength) {
getConfig().setBucketEncodingLength(bucketEncodingLength);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.google.common.collect.TreeMultimap;

import datawave.marking.MarkingFunctions;
import datawave.query.config.SSDeepSimilarityQueryConfiguration;
import datawave.query.util.ssdeep.ChunkSizeEncoding;
import datawave.query.util.ssdeep.IntegerEncoding;
import datawave.query.util.ssdeep.NGramScoreTuple;
Expand Down Expand Up @@ -62,15 +63,15 @@ public class SSDeepSimilarityQueryTransformer extends BaseQueryLogicTransformer<
/** Tracks which ssdeep hashes each of the ngrams originated from */
final Multimap<NGramTuple,SSDeepHash> queryMap;

public SSDeepSimilarityQueryTransformer(Query query, Multimap<NGramTuple,SSDeepHash> queryMap, IntegerEncoding bucketEncoder,
ChunkSizeEncoding chunkSizeEncoding, MarkingFunctions markingFunctions, ResponseObjectFactory responseObjectFactory) {
public SSDeepSimilarityQueryTransformer(Query query, SSDeepSimilarityQueryConfiguration config, MarkingFunctions markingFunctions,
ResponseObjectFactory responseObjectFactory) {
super(markingFunctions);
this.auths = new Authorizations(query.getQueryAuthorizations().split(","));
this.queryMap = queryMap;
this.queryMap = config.getQueryMap();
this.responseObjectFactory = responseObjectFactory;

this.bucketEncoder = bucketEncoder;
this.chunkSizeEncoding = chunkSizeEncoding;
this.bucketEncoder = new IntegerEncoding(config.getBucketEncodingBase(), config.getBucketEncodingLength());
this.chunkSizeEncoding = new ChunkSizeEncoding();

this.chunkStart = bucketEncoder.getLength();
this.chunkEnd = chunkStart + chunkSizeEncoding.getLength();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.BatchWriterConfig;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
Expand Down Expand Up @@ -116,17 +114,17 @@ public static void loadData() throws Exception {
InMemoryInstance i = new InMemoryInstance("ssdeepTestInstance");
accumuloClient = new InMemoryAccumuloClient("root", i);

/** create the table */
/* create the table */
TableOperations tops = accumuloClient.tableOperations();
if (tops.exists(tableName)) {
tops.delete(tableName);
}
tops.create(tableName);

/** add ssdeep data to the table */
/* add ssdeep data to the table */
indexSSDeepTestData(accumuloClient);

/** dump the table */
/* dump the table */
logSSDeepTestData(tableName);
}

Expand All @@ -149,7 +147,7 @@ public void setUpQuery() {
this.logic.setResponseObjectFactory(new DefaultResponseObjectFactory());

SubjectIssuerDNPair dn = SubjectIssuerDNPair.of("userDn", "issuerDn");
DatawaveUser user = new DatawaveUser(dn, DatawaveUser.UserType.USER, Sets.newHashSet(this.auths.toString().split(",")), null, null, -1L);
DatawaveUser user = new DatawaveUser(dn, DatawaveUser.UserType.USER, Sets.newHashSet(auths.toString().split(",")), null, null, -1L);
this.principal = new DatawavePrincipal(Collections.singleton(user));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.common.collect.TreeMultimap;

import datawave.marking.MarkingFunctions;
import datawave.query.config.SSDeepSimilarityQueryConfiguration;
import datawave.query.tables.SSDeepSimilarityQueryLogic;
import datawave.query.util.ssdeep.ChunkSizeEncoding;
import datawave.query.util.ssdeep.IntegerEncoding;
Expand Down Expand Up @@ -61,10 +62,6 @@ public void transformTest() {
int bucketEncodingBase = 32;
int bucketEncodingLength = 2;

IntegerEncoding bucketEncoder = new IntegerEncoding(bucketEncodingBase, bucketEncodingLength);
;
ChunkSizeEncoding chunkSizeEncoding = new ChunkSizeEncoding();

NGramTuple tuple = new NGramTuple(chunkSize, chunk);
SSDeepHash hash = SSDeepHash.parse(ssdeepString);

Expand All @@ -75,12 +72,16 @@ public void transformTest() {
Value value = new Value();
AbstractMap.SimpleEntry<Key,Value> entry = new AbstractMap.SimpleEntry<>(key, value);

SSDeepSimilarityQueryConfiguration config = SSDeepSimilarityQueryConfiguration.create();
config.setBucketEncodingBase(bucketEncodingBase);
config.setBucketEncodingLength(bucketEncodingLength);
config.setQueryMap(queryMap);

basicExpects(key);

PowerMock.replayAll();

SSDeepSimilarityQueryTransformer transformer = new SSDeepSimilarityQueryTransformer(mockQuery, queryMap, bucketEncoder, chunkSizeEncoding,
mockMarkingFunctions, mockResponseFactory);
SSDeepSimilarityQueryTransformer transformer = new SSDeepSimilarityQueryTransformer(mockQuery, config, mockMarkingFunctions, mockResponseFactory);
Map.Entry<SSDeepHash,NGramTuple> transformedTuple = transformer.transform(entry);
List<Object> resultList = new ArrayList<>();
resultList.add(transformedTuple);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,8 @@

<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:util="http://www.springframework.org/schema/util"
xmlns:context="http://www.springframework.org/schema/context"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-4.0.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-4.0.xsd
http://www.springframework.org/schema/util
http://www.springframework.org/schema/util/spring-util-4.0.xsd">
http://www.springframework.org/schema/beans/spring-beans-4.0.xsd">

<!-- Query Logic that returns document content -->
<bean id="SSDeepSimilarityQuery" parent="baseQueryLogic" scope="prototype" class="datawave.query.tables.SSDeepSimilarityQueryLogic">
Expand Down

0 comments on commit f203f1c

Please sign in to comment.