Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exponential memory improvement by re-using NameState across multiple patterns #88

Merged
merged 18 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 100 additions & 13 deletions src/main/software/amazon/event/ruler/ACFinder.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package software.amazon.event.ruler;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
* Matches rules to events as does Finder, but in an array-consistent fashion, thus the AC prefix on the class name.
Expand Down Expand Up @@ -29,14 +31,19 @@ private static List<Object> find(final ACTask task) {
if (startState == null) {
return Collections.emptyList();
}
moveFrom(null, new NameStateWithPattern(startState, null), 0, task, new ArrayMembership());

moveFrom(startState, 0, task, new ArrayMembership());
// each iteration removes a Step and adds zero or more new ones
while (task.stepsRemain()) {
tryStep(task);
}

return task.getMatchedRules();
}

// remove a step from the work queue and see if there's a transition
private static void tryStep(final ACTask task, final ACStep step) {
private static void tryStep(final ACTask task) {
final ACStep step = task.nextStep();
final Field field = task.event.fields.get(step.fieldIndex);

// if we can step from where we are to the new field without violating array consistency
Expand All @@ -49,33 +56,68 @@ private static void tryStep(final ACTask task, final ACStep step) {

// loop through the value pattern matches, if any
final int nextFieldIndex = step.fieldIndex + 1;
for (NameState nextNameState : valueMatcher.transitionOn(field.val)) {
for (NameStateWithPattern nextNameStateWithPattern : valueMatcher.transitionOn(field.val)) {

// we have moved to a new NameState
// this NameState might imply a rule match
task.collectRules(nextNameState);
task.collectRules(step.candidateSubRuleIds, nextNameStateWithPattern);

// set up for attempting to move on from the new state
moveFrom(nextNameState, nextFieldIndex, task, newMembership);
moveFrom(step, nextNameStateWithPattern, nextFieldIndex, task, newMembership);
}
}
}
}

private static void tryMustNotExistMatch(final NameState nameState, final ACTask task, int nextKeyIndex, final ArrayMembership arrayMembership) {
private static void tryMustNotExistMatch(final ACStep step, final NameState nameState, final ACTask task,
int nextKeyIndex, final ArrayMembership arrayMembership) {
if (!nameState.hasKeyTransitions()) {
return;
}

for (NameState nextNameState : nameState.getNameTransitions(task.event, arrayMembership)) {
if (nextNameState != null) {
addNameState(nextNameState, task, nextKeyIndex, arrayMembership);
addNameState(step, new NameStateWithPattern(nextNameState, Patterns.absencePatterns()), task,
nextKeyIndex, arrayMembership);
}
}
}

// Move from a state. Give all the remaining event fields a chance to transition from it
private static void moveFrom(final NameState fromState, int fieldIndex, final ACTask task, final ArrayMembership arrayMembership) {
private static void moveFrom(final ACStep step, final NameStateWithPattern fromStateWithPattern, int fieldIndex,
final ACTask task, final ArrayMembership arrayMembership) {
// There is no step for initial move. Use empty candidate sub-rule set when adding steps. Do not proceed with
// rest of function as it is only relevant for subsequent moves.
if (step == null) {
tryMustNotExistMatch(null, fromStateWithPattern.getNameState(), task, fieldIndex, arrayMembership);

while (fieldIndex < task.fieldCount) {
// Calculate candidate sub-rule IDs for next step. If we have a pattern, use it to calculate candidates.
// Otherwise, use empty candidate set, meaning we are on first step and everything is still a candidate.
Set<Double> candidateSubRuleIdsForNextStep;
if (fromStateWithPattern.getPattern() != null) {
candidateSubRuleIdsForNextStep = calculateCandidateSubRuleIdsForNextStep(null, fromStateWithPattern);
// If there are no more candidate sub-rule IDs, there is no need to proceed further.
if (candidateSubRuleIdsForNextStep == null || candidateSubRuleIdsForNextStep.isEmpty()) {
break;
}
} else {
candidateSubRuleIdsForNextStep = new HashSet<>();
}

task.addStep(fieldIndex++, fromStateWithPattern.getNameState(), candidateSubRuleIdsForNextStep,
arrayMembership);
}
return;
}

Set<Double> candidateSubRuleIdsForNextStep = calculateCandidateSubRuleIdsForNextStep(step.candidateSubRuleIds,
fromStateWithPattern);
// If there are no more candidate sub-rules, there is no need to proceed further.
if (candidateSubRuleIdsForNextStep == null || candidateSubRuleIdsForNextStep.isEmpty()) {
return;
}

/*
* The Name Matchers look for an [ { exists: false } ] match. They
* will match if a particular key is not present
Expand All @@ -91,17 +133,62 @@ private static void moveFrom(final NameState fromState, int fieldIndex, final AC
* the final state can still be evaluated to true if the particular event
* does not have the key configured for [ { exists: false } ].
*/
tryMustNotExistMatch(fromState, task, fieldIndex, arrayMembership);
tryMustNotExistMatch(new ACStep(fieldIndex, fromStateWithPattern.getNameState(), candidateSubRuleIdsForNextStep,
arrayMembership), fromStateWithPattern.getNameState(), task, fieldIndex, arrayMembership);

// Add more steps using our new set of candidate sub-rules.
while (fieldIndex < task.fieldCount) {
tryStep(task, new ACStep(fieldIndex++, fromState, arrayMembership));
task.addStep(fieldIndex++, fromStateWithPattern.getNameState(), candidateSubRuleIdsForNextStep,
arrayMembership);
}
}

private static void addNameState(NameState nameState, ACTask task, int nextKeyIndex, final ArrayMembership arrayMembership) {
/**
* Calculate the candidate sub-rule IDs for the next step.
*
* @param currentCandidateSubRuleIds The candidate sub-rule IDs for the current step. Use null to indicate that we
* are on first step and so there are not yet any candidate sub-rules.
* @param fromStateWithPattern The NameStateWithPattern for the NameState we are transitioning from.
* @return The set of candidate sub-rule IDs for the next step. Null means there are no candidates and thus, there
* is no point to evaluating subsequent steps.
*/
private static Set<Double> calculateCandidateSubRuleIdsForNextStep(
final Set<Double> currentCandidateSubRuleIds, final NameStateWithPattern fromStateWithPattern) {
// Start out with the candidate sub-rule IDs from the current step.
Set<Double> candidateSubRuleIdsForNextStep = new HashSet<>();
if (currentCandidateSubRuleIds != null) {
candidateSubRuleIdsForNextStep.addAll(currentCandidateSubRuleIds);
}

// These are all the sub-rule IDs that use the matched pattern to transition to the next NameState. Note that
// they are not all candidates as they may have required different values for previously evaluated fields.
Set<Double> subRuleIds = fromStateWithPattern.getNameState().getNonTerminalSubRuleIdsForPattern(
fromStateWithPattern.getPattern());

// If no sub-rules used the matched pattern to transition to the next NameState, then there are no matches to be
// found by going further.
if (subRuleIds == null) {
return null;
}

// If there are no candidate sub-rules, this means we are on the first NameState and must initialize the
// candidate sub-rules to those that used the matched pattern to transition to the next NameState. If there are
// already candidates, then retain only those that used the matched pattern to transition to the next NameState.
if (candidateSubRuleIdsForNextStep.isEmpty()) {
candidateSubRuleIdsForNextStep.addAll(subRuleIds);
} else {
candidateSubRuleIdsForNextStep.retainAll(subRuleIds);
}

return candidateSubRuleIdsForNextStep;
}

private static void addNameState(ACStep step, NameStateWithPattern nameStateWithPattern, ACTask task,
int nextKeyIndex, final ArrayMembership arrayMembership) {
// one of the matches might imply a rule match
task.collectRules(nameState);
task.collectRules(step == null ? new HashSet<>() : step.candidateSubRuleIds, nameStateWithPattern);

moveFrom(nameState, nextKeyIndex, task, arrayMembership);
moveFrom(step, nameStateWithPattern, nextKeyIndex, task, arrayMembership);
}
}

7 changes: 6 additions & 1 deletion src/main/software/amazon/event/ruler/ACStep.java
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
package software.amazon.event.ruler;

import java.util.Set;

/**
* Represents a suggestion of a state/token combo from which there might be a transition, in an array-consistent fashion.
*/
class ACStep {
final int fieldIndex;
final NameState nameState;
final Set<Double> candidateSubRuleIds;
final ArrayMembership membershipSoFar;

ACStep(final int fieldIndex, final NameState nameState, final ArrayMembership arrayMembership) {
ACStep(final int fieldIndex, final NameState nameState, final Set<Double> candidateSubRuleIds,
final ArrayMembership arrayMembership) {
this.fieldIndex = fieldIndex;
this.nameState = nameState;
this.candidateSubRuleIds = candidateSubRuleIds;
this.membershipSoFar = arrayMembership;
}
}
50 changes: 45 additions & 5 deletions src/main/software/amazon/event/ruler/ACTask.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package software.amazon.event.ruler;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Queue;
import java.util.Set;
import java.util.stream.Collectors;

/**
* Represents the state of an Array-Consistent rule-finding project.
Expand All @@ -13,8 +17,11 @@ class ACTask {
public final Event event;
final int fieldCount;

// the rules, if we find any
private final HashSet<Object> rules = new HashSet<>();
// the sub-rules that matched the event, if we find any
private final Set<NameState.SubRule> matchingSubRules = new HashSet<>();

// Steps queued up for processing
private final Queue<ACStep> stepQueue = new ArrayDeque<>();

// the state machine
private final GenericMachine<?> machine;
Expand All @@ -29,11 +36,44 @@ NameState startState() {
return machine.getStartState();
}

ACStep nextStep() {
return stepQueue.remove();
}

/*
* Add a step to the queue for later consideration
*/
void addStep(final int fieldIndex, final NameState nameState, final Set<Double> candidateSubRuleIds,
final ArrayMembership membershipSoFar) {
stepQueue.add(new ACStep(fieldIndex, nameState, candidateSubRuleIds, membershipSoFar));
}

boolean stepsRemain() {
return !stepQueue.isEmpty();
}

List<Object> getMatchedRules() {
return new ArrayList<>(rules);
return new ArrayList<>(matchingSubRules.stream()
.map(r -> r.getRule())
.collect(Collectors.toSet()));
}

void collectRules(final NameState nameState) {
rules.addAll(nameState.getRules());
void collectRules(final Set<Double> candidateSubRuleIds, final NameStateWithPattern nameStateWithPattern) {
Set<NameState.SubRule> terminalSubRules = nameStateWithPattern.getNameState().getTerminalSubRulesForPattern(
nameStateWithPattern.getPattern());
if (terminalSubRules == null) {
return;
}

// If no candidates, that means we're on the first step, so all sub-rules are candidates.
if (candidateSubRuleIds.isEmpty()) {
matchingSubRules.addAll(terminalSubRules);
} else {
for (NameState.SubRule subRule : terminalSubRules) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is one matching path, the perf would bound to number of subrules and candidate sub rules, but I see your point, there shall have small number of sub rules in reality, shall be no visible perf diff unless we create dedicated benchmark with large quantity of sub rules.
btw, change candidate as double set is a good enhancement

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Yeah, this code is basically the same code as from Task. At some point, we need to reduce the duplicated code from the AC vs non-AC code paths.

if (candidateSubRuleIds.contains(subRule.getId())) {
matchingSubRules.add(subRule);
}
}
}
}
}
Loading