Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exponential memory improvement by re-using NameState across multiple patterns #88

Merged
merged 18 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/software/amazon/event/ruler/ByteMachine.java
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,7 @@ private NameState findMatchPattern(final InputCharacter[] characters, final Patt

SingleByteTransition nextByteState = eachTrans.getNextByteState();
if (nextByteState == null) {
return null;
continue;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated bug fix

}

// We are interested in the first state that hasn't simply led back to trans
Expand Down
74 changes: 56 additions & 18 deletions src/main/software/amazon/event/ruler/GenericMachine.java
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,7 @@ public void addPatternRule(final T name, final Map<String, List<Patterns>> namev
final ArrayList<String> keys = new ArrayList<>(namePatterns.keySet());
Collections.sort(keys);
synchronized(this) {
final List<String> addedKeys = new ArrayList<>();
addStep(getStartState(), keys, 0, namePatterns, name, addedKeys, subRuleContextGenerator.generate());
addIntoUsedFields(addedKeys);
addStep(keys, namePatterns, name);
}
}

Expand Down Expand Up @@ -477,13 +475,45 @@ public void deleteRule(final T name, final InputStream json) throws IOException
}
}

private void addStep(final NameState state,
final List<String> keys,
final int keyIndex,
private void addStep(final List<String> keys,
final Map<String, List<Patterns>> patterns,
final T ruleName,
List<String> addedKeys,
final SubRuleContext context) {
final T ruleName) {
List<String> addedKeys = new ArrayList<>();
List<Set<NameState>> nameStates = new ArrayList<>();
if (addStep(getStartState(), keys, 0, patterns, ruleName, addedKeys, nameStates)) {
SubRuleContext context = subRuleContextGenerator.generate();
for (int i = 0; i < keys.size(); i++) {
boolean isTerminal = i + 1 == keys.size();
for (Patterns pattern : patterns.get(keys.get(i))) {
for (NameState nameState : nameStates.get(i)) {
nameState.addSubRule(ruleName, context.getId(), pattern, isTerminal);
}
}
}
}
addIntoUsedFields(addedKeys);
}

/**
* Add a step, meaning keys and patterns, to the provided NameState.
*
* @param state NameState to add the step to.
* @param keys All keys of the rule.
* @param keyIndex The current index for keys.
* @param patterns Map of key to patterns.
* @param ruleName Name of the rule.
* @param addedKeys Pass in an empty list - this tracks keys that have been added.
* @param nameStatesForEachKey Pass in an empty list - this tracks the NameStates accessible by each key.
* @return True if and only if the keys and patterns being added represent a new sub-rule. Specifically, there
* exists at least one key or at least one pattern for a key not present in another sub-rule of the rule.
*/
private boolean addStep(final NameState state,
final List<String> keys,
final int keyIndex,
final Map<String, List<Patterns>> patterns,
final T ruleName,
List<String> addedKeys,
final List<Set<NameState>> nameStatesForEachKey) {

final String key = keys.get(keyIndex);
ByteMachine byteMachine = state.getTransitionOn(key);
Expand Down Expand Up @@ -516,19 +546,27 @@ private void addStep(final NameState state,
}
nameStates.add(lastNextState);
}

nameStatesForEachKey.add(nameStates);

// Determine if we are adding a new rule or not. If we are not yet at the terminal key, go deeper recursively.
// If we are at the terminal key, check if the NameState already contains a terminal sub-rule for the provided
// rule name and patterns, and propagate this result up the recursion stack.
boolean isRuleNew = false;
final int nextKeyIndex = keyIndex + 1;
boolean isTerminal = nextKeyIndex == keys.size();
for (NameState nameState : nameStates) {
// if this was the last step, then reaching the last state means the rule matched.
final int nextKeyIndex = keyIndex + 1;
boolean isTerminal = nextKeyIndex == keys.size();
for (Patterns pattern : patterns.get(key)) {
nameState.addSubRule(ruleName, context.getId(), pattern, isTerminal);
}
if (nextKeyIndex != keys.size()) {
addStep(nameState, keys, nextKeyIndex, patterns, ruleName, addedKeys, context);
if (!isTerminal) {
isRuleNew = addStep(nameState, keys, nextKeyIndex, patterns, ruleName, addedKeys, nameStatesForEachKey);
} else {
for (Patterns pattern : patterns.get(key)) {
if (!nameState.containsTerminalSubRule(ruleName, pattern)) {
return true;
}
}
}
}

return isRuleNew;
}

private boolean hasValuePatterns(List<Patterns> patterns) {
Expand Down
21 changes: 21 additions & 0 deletions src/main/software/amazon/event/ruler/NameState.java
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,27 @@ void addSubRule(final Object rule, final double subRuleId, final Patterns patter
((Set) patternToSubRules.get(pattern)).add(setElement);
}

/**
* Determines whether this NameState contains at least one terminal sub-rule accessible via the provided rule and
* pattern. Note that this method iterates through all terminal sub-rules for the provided pattern.
*
* @param rule The rule, which may have multiple sub-rules.
* @param pattern The pattern used by the rule to transition to this NameState.
* @return True indicates that this NameState contains at least one matching terminal sub-rule.
*/
boolean containsTerminalSubRule(final Object rule, final Patterns pattern) {
Set<SubRule> subRules = patternToTerminalSubRules.get(pattern);
if (subRules == null) {
return false;
}
for (SubRule subRule : subRules) {
if (subRule.getRule().equals(rule)) {
return true;
}
}
return false;
}

void addTransition(final String key, final ByteMachine to) {
valueTransitions.put(key, to);
}
Expand Down
71 changes: 71 additions & 0 deletions src/test/software/amazon/event/ruler/MachineTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2059,6 +2059,77 @@ public void testApproxSizeForSimplestPossibleMachine() throws Exception {
assertEquals(64, machine.approximateObjectCount());
}

@Test
public void testApproxSizeForDuplicatedRules() throws Exception {
String rule1 = "{ \"a\" : [ 1, 2, 3 ], \"b\" : [4, 5, 6] }";

Machine machine = new Machine();
machine.addRule("r1", rule1);
assertEquals(80, machine.approximateObjectCount());

// Adding the same rule multiple times should not increase object count
for (int i = 0; i < 100; i++) {
machine.addRule("r1", rule1);
}
assertEquals(80, machine.approximateObjectCount());
}

@Test
public void testApproxSizeForAddingDuplicateRuleExceptTerminalKeyIsSubset() throws Exception {
String rule1a = "{ \"a\" : [ 1, 2, 3 ], \"b\" : [4, 5, 6] }";
String rule1b = "{ \"a\" : [ 1, 2, 3 ], \"b\" : [4, 5] }";

Machine machine = new Machine();
machine.addRule("r1", rule1a);
assertEquals(80, machine.approximateObjectCount());

// Adding rule with terminal key having subset of values will be treated as same rule and thus increase size
machine.addRule("r1", rule1b);
assertEquals(80, machine.approximateObjectCount());
}

@Test
public void testApproxSizeForAddingDuplicateRuleExceptNonTerminalKeyIsSubset() throws Exception {
String rule1a = "{ \"a\" : [ 1, 2, 3 ], \"b\" : [4, 5, 6] }";
String rule1b = "{ \"a\" : [ 1, 2 ], \"b\" : [4, 5, 6] }";

Machine machine = new Machine();
machine.addRule("r1", rule1a);
assertEquals(80, machine.approximateObjectCount());

// Adding rule with non-terminal key having subset of values will be treated as same rule and not affect count
machine.addRule("r1", rule1b);
assertEquals(80, machine.approximateObjectCount());
}

@Test
public void testApproxSizeForAddingDuplicateRuleExceptTerminalKeyIsSuperset() throws Exception {
String rule1a = "{ \"a\" : [ 1, 2, 3 ], \"b\" : [4, 5, 6] }";
String rule1b = "{ \"a\" : [ 1, 2, 3 ], \"b\" : [4, 5, 6, 7] }";

Machine machine = new Machine();
machine.addRule("r1", rule1a);
assertEquals(80, machine.approximateObjectCount());

// Adding rule with terminal key having superset of values will be treated as new rule and increase count
machine.addRule("r1", rule1b);
assertEquals(91, machine.approximateObjectCount());
}

@Test
public void testApproxSizeForAddingDuplicateRuleExceptNonTerminalKeyIsSuperset() throws Exception {
String rule1a = "{ \"a\" : [ 1, 2, 3 ], \"b\" : [4, 5, 6] }";
String rule1b = "{ \"a\" : [ 1, 2, 3, 7 ], \"b\" : [4, 5, 6] }";

Machine machine = new Machine();
machine.addRule("r1", rule1a);
assertEquals(80, machine.approximateObjectCount());

// Adding rule with non-terminal key having superset of values will be treated as new rule and increase count
machine.addRule("r1", rule1b);
assertEquals(87, machine.approximateObjectCount());
}

@Test
public void testSuffixChineseMatch() throws Exception {
Machine m = new Machine();
Expand Down
14 changes: 14 additions & 0 deletions src/test/software/amazon/event/ruler/NameStateTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,18 @@ public void testGetSubRuleIdsForTerminal() {
Set<Double> expectedSubRuleIds = new HashSet<>(Arrays.asList(1.0, 3.0, 5.0));
assertEquals(expectedSubRuleIds, nameState.getSubRuleIds(Patterns.exactMatch("a"), true));
}

@Test
public void testContainsTerminalSubRule() {
NameState nameState = new NameState();
nameState.addSubRule("rule1", 1.0, Patterns.exactMatch("a"), true);
nameState.addSubRule("rule2", 2.0, Patterns.exactMatch("a"), false);
nameState.addSubRule("rule1", 2.0, Patterns.exactMatch("b"), false);

assertTrue(nameState.containsTerminalSubRule("rule1", Patterns.exactMatch("a")));
assertFalse(nameState.containsTerminalSubRule("rule2", Patterns.exactMatch("a")));
assertFalse(nameState.containsTerminalSubRule("rule1", Patterns.exactMatch("b")));
assertFalse(nameState.containsTerminalSubRule("rule3", Patterns.exactMatch("a")));
assertFalse(nameState.containsTerminalSubRule("rule1", Patterns.exactMatch("c")));
}
}