diff --git a/src/main/java/network/brightspots/rcv/DominionCvrReader.java b/src/main/java/network/brightspots/rcv/DominionCvrReader.java index 8077b269..b298b569 100644 --- a/src/main/java/network/brightspots/rcv/DominionCvrReader.java +++ b/src/main/java/network/brightspots/rcv/DominionCvrReader.java @@ -17,6 +17,7 @@ package network.brightspots.rcv; import java.io.File; +import java.io.FileNotFoundException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; @@ -38,6 +39,7 @@ class DominionCvrReader { private static final String CANDIDATE_MANIFEST = "CandidateManifest.json"; private static final String CONTEST_MANIFEST = "ContestManifest.json"; private static final String CVR_EXPORT = "CvrExport.json"; + private static final String CVR_EXPORT_PATTERN = "CvrExport_%d.json"; private final ContestConfig config; private final String manifestFolder; private final String undeclaredWriteInLabel; @@ -161,9 +163,8 @@ void readCastVoteRecords(List castVoteRecords, String contestId) Logger.severe("No candidate data found!"); throw new CvrParseException(); } - // parse the cvr - Path cvrPath = Paths.get(manifestFolder, CVR_EXPORT); - parseCvrFile(cvrPath.toString(), castVoteRecords, contestId); + // parse the cvr file(s) + gatherCvrsForContest(castVoteRecords, contestId); if (castVoteRecords.isEmpty()) { Logger.severe("No cast vote record data found!"); throw new CvrParseException(); @@ -173,9 +174,8 @@ void readCastVoteRecords(List castVoteRecords, String contestId) } } - // parse the given file into a List of CastVoteRecords for tabulation - private void parseCvrFile( - String filePath, List castVoteRecords, String contestIdToLoad) { + // parse the CVR file or files into a List of CastVoteRecords for tabulation + private void gatherCvrsForContest(List castVoteRecords, String contestIdToLoad) { // build a lookup map for candidates codes to optimize Cvr parsing Map> contestIdToCandidateCodes = new HashMap<>(); for (Candidate candidate : this.candidates) { @@ -190,58 +190,100 @@ private void parseCvrFile( } try { - HashMap json = JsonParser.readFromFile(filePath, HashMap.class); - // top-level "Sessions" object contains a lists of Cvr objects from different tabulators - ArrayList sessions = (ArrayList) json.get("Sessions"); - // for each Cvr object extract various fields - for (Object sessionObject : sessions) { - HashMap session = (HashMap) sessionObject; - // extract various ids - String tabulatorId = session.get("TabulatorId").toString(); - String batchId = session.get("BatchId").toString(); - Integer recordId = (Integer) session.get("RecordId"); - String suppliedId = recordId.toString(); - // filter out records which are not current and replace them with adjudicated ones - HashMap adjudicatedData = (HashMap) session.get("Original"); - boolean isCurrent = (boolean) adjudicatedData.get("IsCurrent"); - if (!isCurrent) { - if (session.containsKey("Modified")) { - adjudicatedData = (HashMap) session.get("Modified"); - } else { - Logger.warning( - "CVR has no adjudicated rankings, skipping: " - + "Tabulator ID: %s Batch ID: %s Record ID: %d", - tabulatorId, batchId, recordId); - continue; + Path singleCvrPath = Paths.get(manifestFolder, CVR_EXPORT); + Path firstCvrPath = Paths.get(manifestFolder, String.format(CVR_EXPORT_PATTERN, 1)); + if (singleCvrPath.toFile().exists()) { + HashMap json = JsonParser.readFromFile(singleCvrPath.toString(), HashMap.class); + parseCvrFile(json, castVoteRecords, contestIdToLoad, contestIdToCandidateCodes); + } else if (firstCvrPath.toFile().exists()) { + int recordsParsed = 0; + int recordsParsedAtLastlog = 0; + int cvrSequence = 1; + Path cvrPath = Paths.get(manifestFolder, String.format(CVR_EXPORT_PATTERN, cvrSequence)); + while (cvrPath.toFile().exists()) { + HashMap json = JsonParser.readFromFile(cvrPath.toString(), HashMap.class); + recordsParsed += parseCvrFile(json, castVoteRecords, contestIdToLoad, + contestIdToCandidateCodes); + if (recordsParsed - recordsParsedAtLastlog > 50000) { + Logger.info("Parsed %d records from %d files", recordsParsed, cvrSequence); + recordsParsedAtLastlog = recordsParsed; } + cvrSequence++; + cvrPath = Paths.get(manifestFolder, String.format(CVR_EXPORT_PATTERN, cvrSequence)); } - // validate precinct (may not exist for older data sets) - Integer precinctId = (Integer) adjudicatedData.get("PrecinctId"); - if (precinctId != null - && (this.precincts == null || !this.precincts.containsKey(precinctId))) { - Logger.severe("Precinct ID \"%d\" from CVR not found in manifest data!", precinctId); - throw new CvrParseException(); - } - String precinct = this.precincts != null ? this.precincts.get(precinctId) : null; - // validate precinct portion - Integer precinctPortionId = (Integer) adjudicatedData.get("PrecinctPortionId"); - if (precinctPortionId != null && !this.precinctPortions.containsKey(precinctPortionId)) { - Logger.severe( - "Precinct portion ID \"%d\" from CVR not found in manifest data!", precinctPortionId); - throw new CvrParseException(); - } - String precinctPortion = this.precinctPortions.get(precinctPortionId); - String ballotTypeId = adjudicatedData.get("BallotTypeId").toString(); + } else { + throw new FileNotFoundException(String.format( + "Error parsing cast vote record: neither %s nor %s exists", + singleCvrPath.toString(), firstCvrPath.toString())); + } + } catch (FileNotFoundException | CvrParseException exception) { + Logger.severe("Error parsing cast vote record:\n%s", exception); + castVoteRecords.clear(); + } + } - ArrayList contests; - // sometimes there is a "Cards" object at this level - if (adjudicatedData.containsKey("Cards")) { - ArrayList cardsList = (ArrayList) adjudicatedData.get("Cards"); - HashMap cardsObject = (HashMap) cardsList.get(0); - contests = (ArrayList) cardsObject.get("Contests"); + private int parseCvrFile( + HashMap json, List castVoteRecords, String contestIdToLoad, + Map> contestIdToCandidateCodes) + throws CvrParseException { + // top-level "Sessions" object contains a lists of Cvr objects from different tabulators + ArrayList sessions = (ArrayList) json.get("Sessions"); + int recordsParsed = 0; + // for each Cvr object extract various fields + for (Object sessionObject : sessions) { + HashMap session = (HashMap) sessionObject; + // extract various ids + String tabulatorId = session.get("TabulatorId").toString(); + String batchId = session.get("BatchId").toString(); + Integer recordId = (Integer) session.get("RecordId"); + String suppliedId = recordId.toString(); + // filter out records which are not current and replace them with adjudicated ones + HashMap adjudicatedData = (HashMap) session.get("Original"); + boolean isCurrent = (boolean) adjudicatedData.get("IsCurrent"); + if (!isCurrent) { + if (session.containsKey("Modified")) { + adjudicatedData = (HashMap) session.get("Modified"); } else { - contests = (ArrayList) adjudicatedData.get("Contests"); + Logger.warning( + "CVR has no adjudicated rankings, skipping: " + + "Tabulator ID: %s Batch ID: %s Record ID: %d", + tabulatorId, batchId, recordId); + continue; } + } + // validate precinct (may not exist for older data sets) + Integer precinctId = (Integer) adjudicatedData.get("PrecinctId"); + if (precinctId != null + && (this.precincts == null || !this.precincts.containsKey(precinctId))) { + Logger.severe("Precinct ID \"%d\" from CVR not found in manifest data!", + precinctId); + throw new CvrParseException(); + } + String precinct = this.precincts != null ? this.precincts.get(precinctId) : null; + // validate precinct portion + Integer precinctPortionId = (Integer) adjudicatedData.get("PrecinctPortionId"); + if (precinctPortionId != null && !this.precinctPortions.containsKey(precinctPortionId)) { + Logger.severe( + "Precinct portion ID \"%d\" from CVR not found in manifest data!", + precinctPortionId); + throw new CvrParseException(); + } + String precinctPortion = this.precinctPortions.get(precinctPortionId); + String ballotTypeId = adjudicatedData.get("BallotTypeId").toString(); + + ArrayList cardsList; + // sometimes there is a "Cards" object at this level + if (adjudicatedData.containsKey("Cards")) { + cardsList = (ArrayList) adjudicatedData.get("Cards"); + } else { + ArrayList oneCardList = new ArrayList(1); + oneCardList.add(adjudicatedData); + cardsList = (ArrayList) oneCardList; + } + + for (Object cardObject : cardsList) { + HashMap card = (HashMap) cardObject; + ArrayList contests = (ArrayList) card.get("Contests"); // each contest object is a cvr for (Object contestObject : contests) { @@ -272,7 +314,8 @@ private void parseCvrFile( Set candidates = contestIdToCandidateCodes.get(contestId); if (!candidates.contains(candidateCode)) { Logger.severe( - "Candidate code '%s' is not valid for contest '%d'!", candidateCode, contestId); + "Candidate code '%s' is not valid for contest '%d'!", candidateCode, + contestId); throw new CvrParseException(); } // We also need to throw an error if this candidate doesn't appear in the tabulator's @@ -300,15 +343,14 @@ private void parseCvrFile( rankings); castVoteRecords.add(newCvr); } - // provide some user feedback on the Cvr count - if (castVoteRecords.size() % 50000 == 0) { - Logger.info("Parsed %d cast vote records.", castVoteRecords.size()); - } } - } catch (Exception exception) { - Logger.severe("Error parsing cast vote record:\n%s", exception); - castVoteRecords.clear(); + // provide some user feedback on the Cvr count + recordsParsed++; + if (recordsParsed > 0 && recordsParsed % 50000 == 0) { + Logger.info("Parsed %d cast vote records.", recordsParsed); + } } + return recordsParsed; } // Candidate data from a Dominion candidate manifest Json diff --git a/src/test/java/network/brightspots/rcv/TabulatorTests.java b/src/test/java/network/brightspots/rcv/TabulatorTests.java index 11672c22..84b8ecc7 100644 --- a/src/test/java/network/brightspots/rcv/TabulatorTests.java +++ b/src/test/java/network/brightspots/rcv/TabulatorTests.java @@ -278,6 +278,12 @@ void testDominionNoPrecinctData() { runTabulationTest("dominion_no_precinct_data"); } + @Test + @DisplayName("multi-cvr file dominion test") + void multiFileDominionTest() { + runTabulationTest("dominion_multi_file"); + } + @Test @DisplayName("test invalid params in config file") void invalidParamsTest() { diff --git a/src/test/resources/network/brightspots/rcv/test_data b/src/test/resources/network/brightspots/rcv/test_data index 05cef1e3..99486a50 160000 --- a/src/test/resources/network/brightspots/rcv/test_data +++ b/src/test/resources/network/brightspots/rcv/test_data @@ -1 +1 @@ -Subproject commit 05cef1e37b9f8ecbf0e8b1695816c7f0cd9f5487 +Subproject commit 99486a50a72f499da14f59269102d8e78551c724