Skip to content

Commit

Permalink
#176 Re-adding capitalization property. Fixing function spelling. Set…
Browse files Browse the repository at this point in the history
…ting confidence values. Improving ngram max.
  • Loading branch information
jzonthemtn committed Dec 23, 2024
1 parent 57a2b46 commit 649d402
Show file tree
Hide file tree
Showing 14 changed files with 120 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -798,12 +798,13 @@ public List<Filter> getFiltersForPolicy(final Policy policy, final Map<String, M
if(!terms.isEmpty()) {

final String classification = customDictionary.getClassification();
final boolean capitalized = customDictionary.isCapitalized();
LOGGER.info("Custom dictionary contains {} terms.", terms.size());

if(!SensitivityLevel.OFF.getName().equalsIgnoreCase(customDictionary.getSensitivity())) {

final SensitivityLevel sensitivityLevel = SensitivityLevel.fromName(customDictionary.getSensitivity());
enabledFilters.add(new FuzzyDictionaryFilter(FilterType.CUSTOM_DICTIONARY, filterConfiguration, sensitivityLevel, terms));
enabledFilters.add(new FuzzyDictionaryFilter(FilterType.CUSTOM_DICTIONARY, filterConfiguration, sensitivityLevel, terms, capitalized));

} else {

Expand Down Expand Up @@ -850,7 +851,7 @@ public List<Filter> getFiltersForPolicy(final Policy policy, final Map<String, M
final SensitivityLevel sensitivityLevel = policy.getIdentifiers().getCity().getSensitivityLevel();
final boolean capitalized = policy.getIdentifiers().getCity().isCapitalized();

final Filter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, sensitivityLevel);
final Filter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, sensitivityLevel, capitalized);
enabledFilters.add(filter);
filterCache.get(policy.getName()).put(FilterType.LOCATION_CITY, filter);

Expand Down Expand Up @@ -878,7 +879,7 @@ public List<Filter> getFiltersForPolicy(final Policy policy, final Map<String, M
final SensitivityLevel sensitivityLevel = policy.getIdentifiers().getCounty().getSensitivityLevel();
final boolean capitalized = policy.getIdentifiers().getCounty().isCapitalized();

final Filter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_COUNTY, filterConfiguration, sensitivityLevel);
final Filter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_COUNTY, filterConfiguration, sensitivityLevel, capitalized);
enabledFilters.add(filter);
filterCache.get(policy.getName()).put(FilterType.LOCATION_COUNTY, filter);

Expand Down Expand Up @@ -906,7 +907,7 @@ public List<Filter> getFiltersForPolicy(final Policy policy, final Map<String, M
final SensitivityLevel sensitivityLevel = policy.getIdentifiers().getState().getSensitivityLevel();
final boolean capitalized = policy.getIdentifiers().getState().isCapitalized();

final Filter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_STATE, filterConfiguration, sensitivityLevel);
final Filter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_STATE, filterConfiguration, sensitivityLevel, capitalized);
enabledFilters.add(filter);
filterCache.get(policy.getName()).put(FilterType.LOCATION_STATE, filter);

Expand Down Expand Up @@ -934,7 +935,7 @@ public List<Filter> getFiltersForPolicy(final Policy policy, final Map<String, M
final SensitivityLevel sensitivityLevel = policy.getIdentifiers().getHospital().getSensitivityLevel();
final boolean capitalized = policy.getIdentifiers().getHospital().isCapitalized();

final Filter filter = new FuzzyDictionaryFilter(FilterType.HOSPITAL, filterConfiguration, sensitivityLevel);
final Filter filter = new FuzzyDictionaryFilter(FilterType.HOSPITAL, filterConfiguration, sensitivityLevel, capitalized);
enabledFilters.add(filter);
filterCache.get(policy.getName()).put(FilterType.HOSPITAL, filter);

Expand Down Expand Up @@ -962,7 +963,7 @@ public List<Filter> getFiltersForPolicy(final Policy policy, final Map<String, M
final SensitivityLevel sensitivityLevel = policy.getIdentifiers().getHospitalAbbreviation().getSensitivityLevel();
final boolean capitalized = policy.getIdentifiers().getHospitalAbbreviation().isCapitalized();

final Filter filter = new FuzzyDictionaryFilter(FilterType.HOSPITAL_ABBREVIATION, filterConfiguration, sensitivityLevel);
final Filter filter = new FuzzyDictionaryFilter(FilterType.HOSPITAL_ABBREVIATION, filterConfiguration, sensitivityLevel, capitalized);
enabledFilters.add(filter);
filterCache.get(policy.getName()).put(FilterType.HOSPITAL_ABBREVIATION, filter);

Expand Down Expand Up @@ -990,7 +991,7 @@ public List<Filter> getFiltersForPolicy(final Policy policy, final Map<String, M
final SensitivityLevel sensitivityLevel = policy.getIdentifiers().getFirstName().getSensitivityLevel();
final boolean capitalized = policy.getIdentifiers().getFirstName().isCapitalized();

final Filter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, sensitivityLevel);
final Filter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, sensitivityLevel, capitalized);
enabledFilters.add(filter);
filterCache.get(policy.getName()).put(FilterType.FIRST_NAME, filter);

Expand All @@ -1016,8 +1017,9 @@ public List<Filter> getFiltersForPolicy(final Policy policy, final Map<String, M
.build();

final SensitivityLevel sensitivityLevel = policy.getIdentifiers().getSurname().getSensitivityLevel();
final boolean capitalized = policy.getIdentifiers().getSurname().isCapitalized();

final Filter filter = new FuzzyDictionaryFilter(FilterType.SURNAME, filterConfiguration,sensitivityLevel);
final Filter filter = new FuzzyDictionaryFilter(FilterType.SURNAME, filterConfiguration,sensitivityLevel, capitalized);
enabledFilters.add(filter);
filterCache.get(policy.getName()).put(FilterType.SURNAME, filter);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ public void endToEndUsingCustomDictionary() throws Exception {
}

@Test
public void endToEndUsingCustomDictionaryFileFuzzyDictinonaryFilter() throws Exception {
public void endToEndUsingCustomDictionaryFileFuzzyDictionaryFilter() throws Exception {

final Path temp = Files.createTempDirectory("philter");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void filterCitiesExactMatch() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.MEDIUM);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.MEDIUM, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Lived in Washington.", attributes);

Expand All @@ -69,7 +69,7 @@ public void filterCitiesExactMatch2() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.HIGH);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.HIGH, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Lived in New York.", attributes);

Expand All @@ -92,7 +92,7 @@ public void filterCitiesLow() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.LOW, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"Lived in Wshington", attributes);

Expand All @@ -112,7 +112,7 @@ public void filterCitiesMedium() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.MEDIUM);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.MEDIUM, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Lived in Wshington", attributes);

Expand All @@ -133,7 +133,7 @@ public void filterCitiesHigh() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.HIGH);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_CITY, filterConfiguration, SensitivityLevel.HIGH, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"Lived in Wasinton", attributes);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void filterCountiesLow() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_COUNTY, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_COUNTY, filterConfiguration, SensitivityLevel.LOW, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"Lived in Fyette", attributes);

Expand All @@ -68,7 +68,7 @@ public void filterCountiesMedium() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_COUNTY, filterConfiguration, SensitivityLevel.MEDIUM);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_COUNTY, filterConfiguration, SensitivityLevel.MEDIUM, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "He lived in Fyette", attributes);

Expand All @@ -90,7 +90,7 @@ public void filterCountiesHigh() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_COUNTY, filterConfiguration, SensitivityLevel.HIGH);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_COUNTY, filterConfiguration, SensitivityLevel.HIGH, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Lived in Fyette", attributes);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void filterLow() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"John", attributes);
showSpans(filterResult.getSpans());
Expand All @@ -66,7 +66,7 @@ public void filterMedium1() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.MEDIUM);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.MEDIUM, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Michel had eye cancer", attributes);
showSpans(filterResult.getSpans());
Expand All @@ -84,7 +84,7 @@ public void filterMedium2() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Jennifer had eye cancer", attributes);
showSpans(filterResult.getSpans());
Expand All @@ -102,7 +102,7 @@ public void filterHigh() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.HIGH);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.HIGH, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Sandra in Washington", attributes);
showSpans(filterResult.getSpans());
Expand All @@ -120,7 +120,7 @@ public void filter1() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.MEDIUM);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.MEDIUM, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Melissa", attributes);

Expand All @@ -139,7 +139,7 @@ public void filter2() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"thomas", attributes);
showSpans(filterResult.getSpans());
Expand All @@ -157,7 +157,7 @@ public void filter3() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"dat", attributes);
showSpans(filterResult.getSpans());
Expand All @@ -175,7 +175,7 @@ public void filter4() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"joie", attributes);
showSpans(filterResult.getSpans());
Expand All @@ -193,7 +193,7 @@ public void filter5() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"John", attributes);
showSpans(filterResult.getSpans());
Expand All @@ -211,7 +211,7 @@ public void filter6() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.FIRST_NAME, filterConfiguration, SensitivityLevel.LOW, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"Smith,Melissa A,MD", attributes);
showSpans(filterResult.getSpans());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void filter1() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.HOSPITAL_ABBREVIATION, filterConfiguration, SensitivityLevel.HIGH);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.HOSPITAL_ABBREVIATION, filterConfiguration, SensitivityLevel.HIGH, true);

final FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Went to WMC", attributes);
showSpans(filterResult.getSpans());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void filter1() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.HOSPITAL, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.HOSPITAL, filterConfiguration, SensitivityLevel.LOW, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"Wyoming Medical Center", attributes);
Assertions.assertEquals(1, filterResult.getSpans().size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void filterStatesLow() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_STATE, filterConfiguration, SensitivityLevel.LOW);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_STATE, filterConfiguration, SensitivityLevel.LOW, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE,"Lived in Washington", attributes);
Assertions.assertEquals(1, filterResult.getSpans().size());
Expand All @@ -66,7 +66,7 @@ public void filterStatesMedium() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_STATE, filterConfiguration, SensitivityLevel.MEDIUM);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_STATE, filterConfiguration, SensitivityLevel.MEDIUM, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Lived in Wshington", attributes);
Assertions.assertEquals(1, filterResult.getSpans().size());
Expand All @@ -83,7 +83,7 @@ public void filterStatesHigh() throws Exception {
.withWindowSize(windowSize)
.build();

final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_STATE, filterConfiguration, SensitivityLevel.HIGH);
final FuzzyDictionaryFilter filter = new FuzzyDictionaryFilter(FilterType.LOCATION_STATE, filterConfiguration, SensitivityLevel.HIGH, true);

FilterResult filterResult = filter.filter(getPolicy(), "context", "documentid", PIECE, "Lived in Wasinton", attributes);
Assertions.assertEquals(0, filterResult.getSpans().size());
Expand Down
Loading

0 comments on commit 649d402

Please sign in to comment.