Skip to content

Commit

Permalink
Group results in issues #200
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Mar 14, 2023
1 parent be9aa3f commit 9429939
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ public class ValidatorDAO {
private final Map<String, Map<ValidationError, Integer>> instanceBasedErrorCounterGroupped = new HashMap<>();
private final Map<Integer, Integer> totalRecordCounter = new HashMap<>();
private final Map<String, Map<Integer, Integer>> totalRecordCounterGroupped = new HashMap<>();
private final Map<Integer, Integer> totalInstanceCounter = new HashMap<>();
private final Map<Integer, Integer> recordBasedErrorCounter = new HashMap<>();
private final Map<String, Map<Integer, Integer>> recordBasedErrorCounterGroupped = new HashMap<>();
private final Map<ValidationErrorCategory, Integer> categoryInstanceCounter = new EnumMap<>(ValidationErrorCategory.class);
private final Map<String, Map<ValidationErrorCategory, Integer>> categoryInstanceCounterGroupped = new HashMap<>();
private final Map<Integer, Integer> totalInstanceCounter = new HashMap<>();
private final Map<Integer, Set<String>> errorCollector = new TreeMap<>();
private final Map<ValidationErrorType, Integer> typeInstanceCounter = new EnumMap<>(ValidationErrorType.class);
private final Map<ValidationErrorCategory, Integer> categoryInstanceCounter = new EnumMap<>(ValidationErrorCategory.class);
private final Map<String, Map<ValidationErrorType, Integer>> typeInstanceCounterGroupped = new HashMap<>();

public Map<ValidationErrorCategory, Integer> getCategoryRecordCounter() {
return categoryRecordCounter;
Expand Down Expand Up @@ -66,6 +69,10 @@ public Map<Integer, Integer> getRecordBasedErrorCounter() {
return recordBasedErrorCounter;
}

public Map<String, Map<Integer, Integer>> getRecordBasedErrorCounterGroupped() {
return recordBasedErrorCounterGroupped;
}

public Map<Integer, Set<String>> getErrorCollector() {
return errorCollector;
}
Expand All @@ -74,7 +81,15 @@ public Map<ValidationErrorType, Integer> getTypeInstanceCounter() {
return typeInstanceCounter;
}

public Map<String, Map<ValidationErrorType, Integer>> getTypeInstanceCounterGroupped() {
return typeInstanceCounterGroupped;
}

public Map<ValidationErrorCategory, Integer> getCategoryInstanceCounter() {
return categoryInstanceCounter;
}

public Map<String, Map<ValidationErrorCategory, Integer>> getCategoryInstanceCounterGroupped() {
return categoryInstanceCounterGroupped;
}
}
78 changes: 30 additions & 48 deletions src/main/java/de/gwdg/metadataqa/marc/cli/ValidatorCli.java
Original file line number Diff line number Diff line change
Expand Up @@ -198,25 +198,15 @@ public void processRecord(BibliographicRecord bibliographicRecord, int i) {
processDetails(bibliographicRecord, validator);
} else {
if (parameters.doSummary()) {
updateTotalRecordCOunter(0, groupIds);
// TODO: use enum instead
updateCounters(0, groupIds, validatorDAO.getTotalRecordCounter(), validatorDAO.getTotalRecordCounterGroupped());
}
}
if (parameters.collectAllErrors())
allValidationErrors.addAll(validator.getValidationErrors());
counter++;
}

private void updateTotalRecordCOunter(int key, Set<String> groupIds) {
if (doGroups()) {
for (String groupId : groupIds) {
validatorDAO.getTotalRecordCounterGroupped().computeIfAbsent(groupId, s -> new TreeMap<>());
count(key, validatorDAO.getTotalRecordCounterGroupped().get(groupId));
}
} else {
count(key, validatorDAO.getTotalRecordCounter());
}
}

private void processDetails(BibliographicRecord marcRecord, Validator validator) {
List<ValidationError> errors = validator.getValidationErrors();
if (!errors.isEmpty()) {
Expand Down Expand Up @@ -271,43 +261,29 @@ private void processSummary(BibliographicRecord marcRecord,
count(error, validatorDAO.getInstanceBasedErrorCounterGroupped().get(groupId));
}

count(error.getType(), validatorDAO.getTypeInstanceCounter());
count(error.getType().getCategory(), validatorDAO.getCategoryInstanceCounter());
updateCounters(error.getType(), groupIds, validatorDAO.getTypeInstanceCounter(), validatorDAO.getTypeInstanceCounterGroupped());
updateCounters(error.getType().getCategory(), groupIds, validatorDAO.getCategoryInstanceCounter(), validatorDAO.getCategoryInstanceCounterGroupped());

count(1, validatorDAO.getTotalInstanceCounter());
updateErrorCollector(marcRecord.getId(true), error.getId());
uniqueErrors.add(error.getId());
uniqueTypes.add(error.getType());
uniqueCategories.add(error.getType().getCategory());
}

for (Integer id : uniqueErrors) {
count(id, validatorDAO.getRecordBasedErrorCounter());
for (Integer errorId : uniqueErrors) {
updateCounters(errorId, groupIds, validatorDAO.getRecordBasedErrorCounter(), validatorDAO.getRecordBasedErrorCounterGroupped());
}
for (ValidationErrorType errorType : uniqueTypes) {
if (this.doGroups()) {
for (String groupId : groupIds) {
validatorDAO.getTypeRecordCounterGroupped().computeIfAbsent(groupId, s -> new TreeMap<>());
count(errorType, validatorDAO.getTypeRecordCounterGroupped().get(groupId));
}
} else {
count(errorType, validatorDAO.getTypeRecordCounter());
}
updateCounters(errorType, groupIds, validatorDAO.getTypeRecordCounter(), validatorDAO.getTypeRecordCounterGroupped());
}
for (ValidationErrorCategory errorCategory : uniqueCategories) {
if (this.doGroups()) {
for (String groupId : groupIds) {
validatorDAO.getCategoryRecordCounterGroupped().computeIfAbsent(groupId, s -> new TreeMap<>());
count(errorCategory, validatorDAO.getCategoryRecordCounterGroupped().get(groupId));
}
} else {
count(errorCategory, validatorDAO.getCategoryRecordCounter());
}
updateCounters(errorCategory, groupIds, validatorDAO.getCategoryRecordCounter(), validatorDAO.getCategoryRecordCounterGroupped());
}
updateTotalRecordCOunter(1, groupIds);
// count(1, validatorDAO.getTotalRecordCounter());

updateCounters(1, groupIds, validatorDAO.getTotalRecordCounter(), validatorDAO.getTotalRecordCounterGroupped());
if (!allButInvalidFieldErrors.isEmpty())
updateTotalRecordCOunter(2, groupIds);
// count(2, validatorDAO.getTotalRecordCounter());
updateCounters(2, groupIds, validatorDAO.getTotalRecordCounter(), validatorDAO.getTotalRecordCounterGroupped());
}

@Override
Expand All @@ -318,9 +294,6 @@ public void afterIteration(int numberOfprocessedRecords) {

char separator = getSeparator();
if (parameters.doSummary()) {
logger.info("printSummary");
logger.info("printCategoryCounts");
logger.info("printTypeCounts");
if (doGroups()) {
printSummaryGroupped(separator);
printCategoryCountsGroupped();
Expand All @@ -332,8 +305,6 @@ public void afterIteration(int numberOfprocessedRecords) {
printTypeCounts();
printTotalCounts();
}
logger.info("printTotalCounts");
logger.info("printCollector");
printCollector();
}
logger.info("all printing is DONE");
Expand All @@ -357,7 +328,7 @@ private void printCollector() {
}

private void printSummary(char separator) {
String header = ValidationErrorFormatter.formatHeaderForSummary(parameters.getFormat(), this.doGroups());
String header = ValidationErrorFormatter.formatHeaderForSummary(parameters.getFormat(), doGroups());
print(summaryFile, header);
validatorDAO.getInstanceBasedErrorCounter()
.entrySet()
Expand Down Expand Up @@ -388,7 +359,7 @@ private void printSummary(char separator) {
}

private void printSummaryGroupped(char separator) {
String header = ValidationErrorFormatter.formatHeaderForSummary(parameters.getFormat(), this.doGroups());
String header = ValidationErrorFormatter.formatHeaderForSummary(parameters.getFormat(), doGroups());
print(summaryFile, header);
validatorDAO.getInstanceBasedErrorCounterGroupped()
.entrySet()
Expand All @@ -405,8 +376,8 @@ private void printSummaryGroupped(char separator) {
Integer typeIdB = Integer.valueOf(b.getKey().getType().getId());
int result = typeIdA.compareTo(typeIdB);
if (result == 0) {
Integer recordCountA = validatorDAO.getRecordBasedErrorCounter().get(a.getKey().getId());
Integer recordCountB = validatorDAO.getRecordBasedErrorCounter().get(b.getKey().getId());
Integer recordCountA = validatorDAO.getRecordBasedErrorCounterGroupped().get(groupId).get(a.getKey().getId());
Integer recordCountB = validatorDAO.getRecordBasedErrorCounterGroupped().get(groupId).get(b.getKey().getId());
result = recordCountB.compareTo(recordCountA);
}
return result;
Expand All @@ -419,7 +390,7 @@ private void printSummaryGroupped(char separator) {
cells.add(groupId);
cells.add(error.getId());
cells.addAll(Arrays.asList(ValidationErrorFormatter.asArrayWithoutId(error)));
cells.addAll(Arrays.asList(instanceCount, validatorDAO.getRecordBasedErrorCounter().get(error.getId())));
cells.addAll(Arrays.asList(instanceCount, validatorDAO.getRecordBasedErrorCounterGroupped().get(groupId).get(error.getId())));
// TODO: separator
print(summaryFile, CsvUtils.createCsv(cells));
});
Expand Down Expand Up @@ -467,7 +438,7 @@ private void printTypeCountsGroupped() {
.forEach(entry -> {
ValidationErrorType type = entry.getKey();
int records = entry.getValue();
int instances = validatorDAO.getTypeInstanceCounter().get(entry.getKey());
int instances = validatorDAO.getTypeInstanceCounterGroupped().get(groupId).get(entry.getKey());
try {
writer.write(CsvUtils.createCsv(groupId, type.getId(), type.getCategory().getId(), type.getCategory().getName(), type.getMessage(), instances, records));
} catch (IOException e) {
Expand Down Expand Up @@ -573,7 +544,7 @@ private void printCategoryCountsGroupped() {
.forEach(entry -> {
ValidationErrorCategory category = entry.getKey();
int records = entry.getValue();
int instances = validatorDAO.getCategoryInstanceCounter().getOrDefault(entry.getKey(), -1);
int instances = validatorDAO.getCategoryInstanceCounterGroupped().get(groupId).getOrDefault(entry.getKey(), -1);
try {
writer.write(CsvUtils.createCsv(groupId, category.getId(), category.getName(), instances, records));
} catch (IOException e) {
Expand Down Expand Up @@ -672,6 +643,17 @@ public ValidatorConfiguration getValidityConfiguration() {
return validatorConfiguration;
}

private <T extends Object> void updateCounters(T key, Set<String> groupIds, Map<T, Integer> counterSingle, Map<String, Map<T, Integer>> counterGroupped) {
if (doGroups()) {
for (String groupId : groupIds) {
counterGroupped.computeIfAbsent(groupId, s -> new TreeMap<>());
count(key, counterGroupped.get(groupId));
}
} else {
count(key, counterSingle);
}
}

private class Counter {
int id;
int count;
Expand Down
17 changes: 9 additions & 8 deletions src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -167,24 +167,25 @@ public void validate_pica_groupBy() throws Exception {

} else if (outputFile.equals("issue-summary.csv")) {
String all = StringUtils.join(lines, "\n");
// System.err.println(all);
assertEquals(1058, lines.size());
assertEquals("groupId,id,MarcPath,categoryId,typeId,type,message,url,instances,records", lines.get(0).trim());
assertTrue(Pattern.compile("100,\\d,001@,3,9,undefined field,001@,,1,10").matcher(all).find());
assertTrue(Pattern.compile("100,\\d,001U,3,9,undefined field,001U,,1,10").matcher(all).find());
assertTrue(Pattern.compile("100,\\d+,044K/00-09,5,13,undefined subfield,V,https://format.k10plus.de/k10plushelp.pl\\?cmd=kat&katalog=Standard&val=5550-5559,1,7").matcher(all).find());
assertTrue(Pattern.compile("100,\\d+,044K/00-09,5,13,undefined subfield,3,https://format.k10plus.de/k10plushelp.pl\\?cmd=kat&katalog=Standard&val=5550-5559,1,7").matcher(all).find());
assertTrue(Pattern.compile("100,\\d,001@,3,9,undefined field,001@,,1,1").matcher(all).find());
assertTrue(Pattern.compile("100,\\d,001U,3,9,undefined field,001U,,1,1").matcher(all).find());
assertTrue(Pattern.compile("100,\\d+,044K/00-09,5,13,undefined subfield,V,https://format.k10plus.de/k10plushelp.pl\\?cmd=kat&katalog=Standard&val=5550-5559,1,1").matcher(all).find());
assertTrue(Pattern.compile("100,\\d+,044K/00-09,5,13,undefined subfield,3,https://format.k10plus.de/k10plushelp.pl\\?cmd=kat&katalog=Standard&val=5550-5559,1,1").matcher(all).find());

} else if (outputFile.equals("issue-by-category.csv")) {
assertEquals(94, lines.size());
assertEquals("groupId,id,category,instances,records", lines.get(0).trim());
assertEquals("100,3,data field,22,1", lines.get(1).trim());
assertEquals("100,5,subfield,157,1", lines.get(2).trim());
assertEquals("100,3,data field,2,1", lines.get(1).trim());
assertEquals("100,5,subfield,20,1", lines.get(2).trim());

} else if (outputFile.equals("issue-by-type.csv")) {
assertEquals(108, lines.size());
assertEquals("groupId,id,categoryId,category,type,instances,records", lines.get(0).trim());
assertEquals("100,9,3,data field,undefined field,21,1", lines.get(1).trim());
assertEquals("100,13,5,subfield,undefined subfield,156,1", lines.get(2).trim());
assertEquals("100,9,3,data field,undefined field,2,1", lines.get(1).trim());
assertEquals("100,13,5,subfield,undefined subfield,20,1", lines.get(2).trim());

} else if (outputFile.equals("issue-collector.csv")) {
assertEquals(59, lines.size());
Expand Down

0 comments on commit 9429939

Please sign in to comment.