OHDSI · MaximMoinat · Jun 10, 2020 · Jan 25, 2020 · Jan 25, 2020 · Apr 14, 2020
diff --git a/docs/index.html b/docs/index.html
@@ -357,7 +357,7 @@ <h1>Features</h1>
 </div>
 <div id="current-version" class="section level1">
 <h1>Current version</h1>
-<p><a href="https://github.com/OHDSI/WhiteRabbit/releases/tag/v0.9.0"><strong>v0.9.0</strong></a></p>
+<p><a href="https://github.com/OHDSI/WhiteRabbit/releases/latest"><strong>v0.10.1</strong></a></p>
 </div>
 
 

diff --git a/docs/index.md b/docs/index.md
@@ -21,4 +21,4 @@ It comes with **RabbitInAHat**, an application for interactive design of an ETL
 - Rabbit in a Hat generates ETL specification document according to OMOP templatement according to OMOP template
 
 # Current version
-[**v0.9.0**](https://github.com/OHDSI/WhiteRabbit/releases/tag/v0.9.0)
+[**v0.10.1**](https://github.com/OHDSI/WhiteRabbit/releases/latest)
diff --git a/examples.zip b/examples.zip
diff --git a/iniFileExamples/WhiteRabbit.ini b/iniFileExamples/WhiteRabbit.ini
@@ -7,7 +7,7 @@ PASSWORD = supersecret                        # Password for the database
 DATABASE_NAME = schema_name                   # Name of the data schema used 
 DELIMITER = ,                                 # The delimiter that separates values
 TABLES_TO_SCAN = *                            # Comma-delimited list of table names to scan. Use "*" (asterix) to include all tables in the database
-SCAN_FIELD_VALUES = yes                       # Include a frequency count of field values in the scan report? "yes" or "no"
+SCAN_FIELD_VALUES = yes                       # Include the frequency of field values in the scan report? "yes" or "no"
 MIN_CELL_COUNT = 5                            # Minimum frequency for a field value to be included in the report
 MAX_DISTINCT_VALUES = 1000                    # Maximum number of distinct values per field to be reported
 ROWS_PER_TABLE = 100000                       # Maximum number of rows per table to be scanned for field values

diff --git a/pom.xml b/pom.xml
@@ -6,7 +6,7 @@
     <groupId>org.ohdsi</groupId>
     <artifactId>leporidae</artifactId>
     <packaging>pom</packaging>
-    <version>0.10.0</version>
+    <version>0.10.1</version>
     <modules>
         <module>rabbitinahat</module>
         <module>whiterabbit</module>

diff --git a/rabbit-core/pom.xml b/rabbit-core/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>leporidae</artifactId>
         <groupId>org.ohdsi</groupId>
-        <version>0.10.0</version>
+        <version>0.10.1</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 

diff --git a/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Database.java b/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Database.java
@@ -72,6 +72,10 @@ public void setTables(List<Table> tables) {
 		this.tables = tables;
 	}
 
+	public void addTable(Table table) {
+		this.tables.add(table);
+	}
+
 	public String getDbName() {
 		return dbName;
 	}
@@ -138,11 +142,14 @@ public static Database generateModelFromScanReport(String filename) {
 		Database database = new Database();
 		QuickAndDirtyXlsxReader workbook = new QuickAndDirtyXlsxReader(filename);
 
-		// Create table lookup from tables overview, if exists
+		// Create table lookup from tables overview, if it exists
 		Map<String, Table> nameToTable = createTablesFromTableOverview(workbook, database);
 
 		// Field overview is the first sheet
-		Sheet overviewSheet = workbook.get(0);
+		Sheet overviewSheet = workbook.getByName(ScanSheetName.FIELD_OVERVIEW);
+		if (overviewSheet == null) {
+			overviewSheet = workbook.get(0);
+		}
 		Iterator<QuickAndDirtyXlsxReader.Row> overviewRows = overviewSheet.iterator();
 
 		overviewRows.next();  // Skip header
@@ -168,11 +175,12 @@ public static Database generateModelFromScanReport(String filename) {
 				String fieldName = row.getStringByHeaderName(ScanFieldName.FIELD);
 				Field field = new Field(fieldName.toLowerCase(), table);
 
-				String fractionEmpty = row.getByHeaderName(ScanFieldName.FRACTION_EMPTY);
-				field.setNullable(fractionEmpty == null || !fractionEmpty.equals("0"));
 				field.setType(row.getByHeaderName(ScanFieldName.TYPE));
 				field.setMaxLength(row.getIntByHeaderName(ScanFieldName.MAX_LENGTH));
 				field.setDescription(row.getStringByHeaderName(ScanFieldName.DESCRIPTION));
+				field.setFractionEmpty(row.getDoubleByHeaderName(ScanFieldName.FRACTION_EMPTY));
+				field.setUniqueCount(row.getIntByHeaderName(ScanFieldName.UNIQUE_COUNT));
+				field.setFractionUnique(row.getDoubleByHeaderName(ScanFieldName.FRACTION_UNIQUE));
 				field.setValueCounts(getValueCounts(workbook, tableName, fieldName));
 
 				table.getFields().add(field);
@@ -186,18 +194,13 @@ public static Table createTable(String name, String description, Integer nRows,
 		Table table = new Table();
 		table.setName(name.toLowerCase());
 		table.setDescription(description);
-		table.setRowCount((nRows == null || nRows == -1) ? nRowsChecked : nRows);
+		table.setRowCount(nRows == null ? -1 : nRows);
+		table.setRowsCheckedCount(nRowsChecked == null ? -1 : nRowsChecked);
 		return table;
 	}
 
 	public static Map<String, Table> createTablesFromTableOverview(QuickAndDirtyXlsxReader workbook, Database database) {
-		Sheet tableOverviewSheet = null;
-		for (Sheet sheet : workbook) {
-			if (sheet.getName().equals(ScanSheetName.TABLE_OVERVIEW)) {
-				tableOverviewSheet = sheet;
-				break;
-			}
-		}
+		Sheet tableOverviewSheet = workbook.getByName(ScanSheetName.TABLE_OVERVIEW);
 
 		if (tableOverviewSheet == null) { // No table overview sheet, empty nameToTable
 			return new HashMap<>();
@@ -224,7 +227,7 @@ public static Map<String, Table> createTablesFromTableOverview(QuickAndDirtyXlsx
 		return nameToTable;
 	}
 
-	private static String[][] getValueCounts(QuickAndDirtyXlsxReader workbook, String tableName, String fieldName) {
+	private static ValueCounts getValueCounts(QuickAndDirtyXlsxReader workbook, String tableName, String fieldName) {
 		Sheet tableSheet = null;
 		String targetSheetName = Table.createSheetNameFromTableName(tableName);
 		for (Sheet sheet : workbook) {
@@ -233,29 +236,43 @@ private static String[][] getValueCounts(QuickAndDirtyXlsxReader workbook, Strin
 				break;
 			}
 		}
-		if (tableSheet == null) // Sheet not found for table, return empty array
-			return new String[0][0];
+
+		// Sheet not found for table, return empty
+		if (tableSheet == null) {
+			return new ValueCounts();
+		}
 
 		Iterator<org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row> iterator = tableSheet.iterator();
 		org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row header = iterator.next();
 		int index = header.indexOf(fieldName);
-		List<String[]> list = new ArrayList<String[]>();
+
+		ValueCounts valueCounts = new ValueCounts();
 		if (index != -1) // Could happen when people manually delete columns
 			while (iterator.hasNext()) {
 				org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row row = iterator.next();
 				if (row.size() > index) {
 					String value = row.get(index);
 					String count;
-					if (row.size() > index + 1)
+
+					if (row.size() > index + 1) {
 						count = row.get(index + 1);
-					else
+					} else {
 						count = "";
-					if (value.equals("") && count.equals(""))
+					}
+
+					if (value.equals("") && count.equals("")) {
 						break;
-					list.add(new String[] { value, count });
+					}
+
+					// If the count is not a number, ignore this row
+					try {
+						valueCounts.add(value, (int) (Double.parseDouble(count)));
+					} catch (NumberFormatException e) {
+//						 System.out.println("Count could not be parsed for value: " + value);
+					}
 				}
 			}
-		return list.toArray(new String[list.size()][2]);
+		return valueCounts;
 	}
 
 }
diff --git a/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Field.java b/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Field.java
@@ -25,13 +25,16 @@ public class Field implements MappableItem {
 	private Table				table;
 	private String				name;
 	private String				comment				= "";
-	private String[][]			valueCounts;
+	private ValueCounts			valueCounts = new ValueCounts();
 	private boolean				isNullable;
 	private String				type;
 	private String				description			= "";
 	private Integer				maxLength;
 	private boolean				isStem;
 	private List<ConceptsMap.Concept> conceptIdHints;
+	private Double				fractionEmpty;
+	private Integer				uniqueCount;
+	private Double				fractionUnique;
 
 	public Field(String name, Table table) {
 		this.table = table;
@@ -66,11 +69,11 @@ public void setName(String name) {
 		this.name = name;
 	}
 
-	public String[][] getValueCounts() {
+	public ValueCounts getValueCounts() {
 		return valueCounts;
 	}
 
-	public void setValueCounts(String[][] valueCounts) {
+	public void setValueCounts(ValueCounts valueCounts) {
 		this.valueCounts = valueCounts;
 	}
 
@@ -137,4 +140,29 @@ public List<ConceptsMap.Concept> getConceptIdHints() {
 	public void setConceptIdHints(List<ConceptsMap.Concept> conceptIdHints) {
 		this.conceptIdHints = conceptIdHints;
 	}
+
+	public Double getFractionEmpty() {
+		return fractionEmpty;
+	}
+
+	public void setFractionEmpty(Double fractionEmpty) {
+		this.fractionEmpty = fractionEmpty;
+		this.setNullable(fractionEmpty == null || fractionEmpty != 0);
+	}
+
+	public Integer getUniqueCount() {
+		return uniqueCount;
+	}
+
+	public void setUniqueCount(Integer uniqueCount) {
+		this.uniqueCount = uniqueCount;
+	}
+
+	public Double getFractionUnique() {
+		return fractionUnique;
+	}
+
+	public void setFractionUnique(Double fractionUnique) {
+		this.fractionUnique = fractionUnique;
+	}
 }
diff --git a/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Table.java b/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/Table.java
@@ -96,6 +96,10 @@ public void setRowsCheckedCount(int rowsCheckedCount) {
 		this.rowsCheckedCount = rowsCheckedCount;
 	}
 
+	public void addField(Field field) {
+		this.fields.add(field);
+	}
+
 	public List<Field> getFields() {
 		return fields;
 	}

diff --git a/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/ValueCounts.java b/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/ValueCounts.java
@@ -0,0 +1,81 @@
+/*******************************************************************************
+ * Copyright 2020 Observational Health Data Sciences and Informatics
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.rabbitInAHat.dataModel;
+
+import java.util.ArrayList;
+
+public class ValueCounts {
+    private ArrayList<ValueCounts.ValueCount> valueCounts = new ArrayList<>();
+    private int totalFrequency = 0;
+
+    public class ValueCount {
+        private String value;
+        private int frequency;
+
+        public ValueCount(String value, int frequency) {
+            this.value = value;
+            this.frequency = frequency;
+        }
+
+        public String getValue() {
+            return value;
+        }
+
+        public void setValue(String value) {
+            this.value = value;
+        }
+
+        public int getFrequency() {
+            return frequency;
+        }
+    }
+
+    public boolean add(String value, int frequency) {
+        totalFrequency += frequency;
+        return valueCounts.add(new ValueCount(value, frequency));
+    }
+
+    public ArrayList<ValueCounts.ValueCount> getAll() {
+        return valueCounts;
+    }
+
+    public ValueCounts.ValueCount get(int i) {
+        return valueCounts.get(i);
+    }
+
+    public String getMostFrequentValue() {
+        // Assumption: first added value is the most frequent one (that is how the scan report is structured)
+        if (valueCounts.size() > 0) {
+            return valueCounts.get(0).getValue();
+        }
+        return null;
+    }
+
+    public int getTotalFrequency() {
+        return totalFrequency;
+    }
+
+    public int size() {
+        return valueCounts.size();
+    }
+
+    public boolean isEmpty() {
+        return size() == 0;
+    }
+
+}
diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/files/QuickAndDirtyXlsxReader.java b/rabbit-core/src/main/java/org/ohdsi/utilities/files/QuickAndDirtyXlsxReader.java
@@ -40,10 +40,11 @@ public class QuickAndDirtyXlsxReader extends ArrayList<Sheet> {
 
 	private static final long	serialVersionUID	= 25124428448185386L;
 
-	private List<String>		sharedStrings		= new ArrayList<String>();
+	private List<String> sharedStrings = new ArrayList<>();
 
-	private Map<String, Sheet>	rIdToSheet			= new HashMap<String, Sheet>();
-	private Map<String, Sheet>	filenameToSheet		= new HashMap<String, Sheet>();
+	private Map<String, Sheet> rIdToSheet = new HashMap<>();
+	private Map<String, Sheet> nameToSheet = new HashMap<>();
+	private Map<String, Sheet> filenameToSheet = new HashMap<>();
 
 	public QuickAndDirtyXlsxReader(String filename) {
 		try {
@@ -56,17 +57,10 @@ public QuickAndDirtyXlsxReader(String filename) {
 			readFromStream(inputStream);
 
 			// Step 3: order the sheets:
-			Collections.sort(this, new Comparator<Sheet>() {
-
-				@Override
-				public int compare(Sheet o1, Sheet o2) {
-					return IntegerComparator.compare(o1.order, o2.order);
-				}
-			});
+			Collections.sort(this, (o1, o2) -> IntegerComparator.compare(o1.order, o2.order));
 		} catch (FileNotFoundException e) {
 			e.printStackTrace();
 		}
-
 	}
 
 	private void loadSharedStringsAndRels(FileInputStream inputStream) {
@@ -191,7 +185,7 @@ else if (tag.equals("/v") || tag.equals("/t")) {
 								result.add("");
 							if (sharedString) {
 								int index = Integer.parseInt(string.substring(stringStart, tagStart - 1));
-								result.set(column, sharedStrings.get(index));
+								result.set(column, decode(sharedStrings.get(index)));
 							} else
 								result.set(column, decode(string.substring(stringStart, tagStart - 1)));
 						}
@@ -521,10 +515,18 @@ private void processWorkBook(InputStream inputStream) throws NumberFormatExcepti
 				Sheet sheet = rIdToSheet.get(rId);
 				sheet.setName(name);
 				sheet.order = Integer.parseInt(order);
+				nameToSheet.put(name, sheet);
 			}
 		}
 	}
 
+	public Sheet getByName(String sheetName) {
+		if (nameToSheet.containsKey(sheetName)) {
+			return nameToSheet.get(sheetName);
+		}
+		return null;
+	}
+
 	public class Sheet extends ArrayList<Row> {
 		private static final long	serialVersionUID	= -8597151681911998153L;
 		private String				name;
@@ -590,6 +592,7 @@ public String getStringByHeaderName(String fieldName) {
 		public Double getDoubleByHeaderName(String fieldName) {
 			String value = getStringByHeaderName(fieldName);
 			if (value != null) {
+				value = value.replace("<=","").trim();
 				return Double.parseDouble(value);
 			} else {
 				return null;