diff --git a/.classpath b/.classpath index 0a1dadd..be8db2b 100644 --- a/.classpath +++ b/.classpath @@ -23,4 +23,4 @@ - + \ No newline at end of file diff --git a/.gitignore b/.gitignore index b83d222..37bf7bb 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target/ +/talend_component/.classpath \ No newline at end of file diff --git a/.project b/.project index 626f00a..c884c3a 100644 --- a/.project +++ b/.project @@ -5,6 +5,11 @@ + + org.eclipse.wst.common.project.facet.core.builder + + + org.eclipse.jdt.core.javabuilder @@ -19,5 +24,6 @@ org.eclipse.jdt.core.javanature org.eclipse.m2e.core.maven2Nature + org.eclipse.wst.common.project.facet.core.nature diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs index abec6ca..6e80039 100644 --- a/.settings/org.eclipse.jdt.core.prefs +++ b/.settings/org.eclipse.jdt.core.prefs @@ -1,5 +1,8 @@ eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5 -org.eclipse.jdt.core.compiler.compliance=1.5 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.5 +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/.settings/org.eclipse.wst.common.project.facet.core.xml b/.settings/org.eclipse.wst.common.project.facet.core.xml new file mode 100644 index 0000000..f4ef8aa --- /dev/null +++ b/.settings/org.eclipse.wst.common.project.facet.core.xml @@ -0,0 +1,4 @@ + + + + diff --git a/README.md b/README.md index 0bb35a0..4ab207e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,77 @@ # Talend component tHashRow -This component creates a hash value for a schema. +This component creates a hash value. All configured input columns will be concateneded with the given delimiter. The resulting string will be used to calculate the hash value. Every configured hash input manipulation will be applied, before the hash is calculated and populated to the selected output column. + +# Basic Settings + +## Basic configuration +### Hash type +Algorithm that will be used to generate the hash + +### Hash output +Column of the output schema in which the hash value will be written + +## Hash input manipulation + +### Relevant fields + +|Column|Use|Trim|Case Sensitive| +|------|---|----|--------------| +|List of input columns| Check if column should added to the hash| Check if column should be trimmed| Select if column should be upper case, lower case, case sensitive or not in use (eg. in case of numeric values) + +### Delimiter +Delimter to seperate the input values + +### Null replacement +Value that will be used to calculate the hash, if input value is null +- Example if replacement value is set to "#NULL#"" + + COLUMN_1 = "Test" + + COLUMN_2 = null + + COLUMN_3 = 123 + + Hash Input results in "Test";#NULL#;123 + +### Fraction size (float) +Maximum precision of float values + +### Fraction size (double) +Maximum precision of double values + +### Number format +List of available number formats. Grouping is gernerally disabled. + +### Date format +#### Format date as miliseconds +- if checked + + all date fields will be represented as miliseconds since unix epoch +- if unchecked + + all date fields will be represented in the given date format + +### Enable string quoting +String based fields will be surrounded with the given quotation mark + +### Cut of empty trialing hash input values +If checked all empty trailing values will be truncated before hash will be calculated +- Example without quoting + + Hash Input = CUSTOMER A;1234;STREET 1;;; + + results in CUSTOMER A;1234;STREET 1 +- Example with quoting + + Hash Input = "CUSTOMER A";1234;"STREET 1";"";;"" + + results also in CUSTOMER A;1234;STREET 1 + +# Advanced Settings + +## Hash output manipulation +### Modify hash output +If all input values are null, the hash value will be replaced with the given value +- Example 1 + + checked and value is set to "22222222222222222222222222222222" + + Hash input = ;;;;; + + Hash value = "22222222222222222222222222222222" +- Example 2 + + unchecked + + Hash input = ;;;;; + + Hash value = 8f0158355357e8302939ea687dba9363 + +## Additional settings +### Show hash input +If checked the hash input (concatenation of all input values) will be exposed to the selected column + diff --git a/pom.xml b/pom.xml index 6cf25d3..6795c28 100644 --- a/pom.xml +++ b/pom.xml @@ -1,12 +1,9 @@ 4.0.0 - - Jan Lolling - - de.jlo.talendcomp - jlo-talendcomp-hash - 1.2 + de.robr.talendcomp + thashrow + 1.4 jar talendcomp_tHash @@ -23,24 +20,5 @@ test - - - - de.cimt.talendcomp - cimt-talendcomp-maven-plugin - 1.8 - - - tHashRow - - component - - - tHashRow - - - - - - + \ No newline at end of file diff --git a/src/main/java/de/cimt/talendcomp/checksum/HashBuilder.java b/src/main/java/de/cimt/talendcomp/checksum/HashBuilder.java deleted file mode 100644 index 25a1aa1..0000000 --- a/src/main/java/de/cimt/talendcomp/checksum/HashBuilder.java +++ /dev/null @@ -1,234 +0,0 @@ -/** - * Copyright 2015 Jan Lolling jan.lolling@gmail.com - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package de.cimt.talendcomp.checksum; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.charset.Charset; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.text.DecimalFormat; -import java.text.NumberFormat; -import java.util.Date; -import java.util.Locale; - -public class HashBuilder { - - private final StringBuilder content = new StringBuilder(); - private boolean caseInsensitive = false; - private final DecimalFormat nf; - private static Charset cs = null; - private final MessageDigest mDigest; - private boolean allNull = true; - private String nullReplacement = "null"; - - private HashBuilder(String hashMethod) throws NoSuchAlgorithmException { - nf = (DecimalFormat) NumberFormat.getNumberInstance(Locale.ENGLISH); - nf.setGroupingUsed(false); - if (cs == null) { - cs = Charset.forName("UTF-8"); - } - mDigest = MessageDigest.getInstance(hashMethod); - } - - public static HashBuilder getMD5HashBuilder() throws NoSuchAlgorithmException { - return new HashBuilder("MD5"); - } - - public static HashBuilder getSHA1HashBuilder() throws NoSuchAlgorithmException { - return new HashBuilder("SHA1"); - } - - public static HashBuilder getSHA256HashBuilder() throws NoSuchAlgorithmException { - return new HashBuilder("SHA-256"); - } - - public HashBuilder reset() { - content.setLength(0); - allNull = true; - return this; - } - - public HashBuilder caseInsensitive() { - caseInsensitive = true; - return this; - } - - public HashBuilder add(final String value) { - if (value == null) { - content.append(nullReplacement); - } else { - if (caseInsensitive) { - content.append(value.trim().toLowerCase(Locale.ENGLISH)); - } else { - content.append(value.trim()); - } - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final Integer value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(nf.format(value)); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final Long value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(nf.format(value)); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final Short value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(nf.format(value)); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final Float value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(nf.format(value)); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final Double value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(nf.format(value)); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final BigDecimal value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(nf.format(value)); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final BigInteger value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(nf.format(value)); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final Date value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(value.getTime()); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final Boolean value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(value); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final byte[] value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(value); - allNull = false; - } - content.append("|"); - return this; - } - - public HashBuilder add(final Byte value) { - if (value == null) { - content.append(nullReplacement); - } else { - content.append(value); - allNull = false; - } - content.append("|"); - return this; - } - - public String build() throws NoSuchAlgorithmException { - if (content.length() == 0) { - return null; - } - final byte[] result = mDigest.digest(content.toString().getBytes(cs)); - final StringBuilder sb = new StringBuilder(); - for (int i = 0; i < result.length; i++) { - sb.append(Integer.toString((result[i] & 0xff) + 0x100, 16).substring(1)); - } - return sb.toString(); - } - - public boolean allValuesAreNull() { - return allNull; - } - - public String getNullReplacement() { - return nullReplacement; - } - - public void setNullReplacement(String nullReplacement) { - if (nullReplacement != null) { - this.nullReplacement = nullReplacement; - } else { - this.nullReplacement = "null"; - } - } - -} diff --git a/src/main/java/de/cimt/talendcomp/checksum/HashCalculation.java b/src/main/java/de/cimt/talendcomp/checksum/HashCalculation.java new file mode 100644 index 0000000..6b2144b --- /dev/null +++ b/src/main/java/de/cimt/talendcomp/checksum/HashCalculation.java @@ -0,0 +1,50 @@ + +package de.cimt.talendcomp.checksum; + +import java.nio.charset.Charset; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Objects; + +public class HashCalculation { + + public static String getMD5Hash(String content) { + return digest("MD5", content); + } + + public static String getSHA1Hash(String content) { + return digest("SHA1", content); + } + + public static String getSHA256Hash(String content) { + return digest("SHA-256", content); + } + + private static String digest(String digestAlgorithm, String content) { + Objects.requireNonNull(digestAlgorithm, "digestAlgorithm must not be null"); + Objects.requireNonNull(content, "content must not be null"); + + MessageDigest messageDigest; + try { + messageDigest = MessageDigest.getInstance(digestAlgorithm); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException("Digest Algorithm " + digestAlgorithm + " could not be found in this environment.", e); + } + + return digest(messageDigest, content); + } + + private static String digest(MessageDigest messageDigest, String content) { + final byte[] result = messageDigest.digest(content.getBytes(Charset.forName("UTF-8"))); + final StringBuilder sb = new StringBuilder(); + + for (int i = 0; i < result.length; i++) { + + sb.append(Integer.toString((result[i] & 0xff) + 0x100, 16).substring(1)); + + } + + return sb.toString(); + } + +} diff --git a/src/main/java/de/cimt/talendcomp/checksum/HashNormalization.java b/src/main/java/de/cimt/talendcomp/checksum/HashNormalization.java new file mode 100644 index 0000000..d889da8 --- /dev/null +++ b/src/main/java/de/cimt/talendcomp/checksum/HashNormalization.java @@ -0,0 +1,279 @@ +package de.cimt.talendcomp.checksum; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.text.NumberFormat; +import java.text.SimpleDateFormat; +import java.util.Date; + +public class HashNormalization { + + private NormalizeConfig config; + private StringBuilder sb = new StringBuilder(); + private boolean firstCall = true; + private boolean allNull = true; + + public HashNormalization(NormalizeConfig config) { + if (config == null) + throw new IllegalArgumentException("config variable cannot be null"); + + this.config = config; + + } + + /** + * Reset underlying string builder to add new hash objects + */ + public void reset(){ + firstCall=true; + allNull=true; + sb.setLength(0); + } + + /** + * Calculates hash value based on added objects + * @param context MD5 / SHA1 / SHA-256 + * @return + * @throws IllegalArgumentException + */ + public String calculateHash(String context) throws IllegalArgumentException { + + if(sb.toString().isEmpty() || allNull){ + if(config.isModifyHashOutput()) + return config.getHashOutputIfBaseIsNull(); + } + + if(!"MD5".equalsIgnoreCase(context) && !"SHA1".equalsIgnoreCase(context) && !"SHA-256".equalsIgnoreCase(context)) + throw new IllegalArgumentException("context has to be MD5, SHA1 or SHA-256"); + + if("MD5".equalsIgnoreCase(context)) + return HashCalculation.getMD5Hash(this.getNormalizedString()); + + if("SHA1".equalsIgnoreCase(context)) + return HashCalculation.getSHA1Hash(this.getNormalizedString()); + + if("SHA-256".equalsIgnoreCase(context)) + return HashCalculation.getSHA256Hash(this.getNormalizedString()); + + return null; + + } + + /** + * Add new objects which will be concatenate with the defined delimiter + * @param object + * @param itemConfig + */ + public void add(Object object, NormalizeObjectConfig itemConfig){ + + if (firstCall){ + sb.append(normalize(object, itemConfig)); + firstCall = false; + }else{ + sb.append(config.getDelimter()); + sb.append(normalize(object, itemConfig)); + } + + } + + /** + * Returns the normalized string, based on added objects + * @return + */ + public String getNormalizedString(){ + + String normalizedString = sb.toString(); + + if(config.isCutOffEmptyTrailingObjects()){ + + boolean endsWithDelimter = normalizedString.endsWith(config.getDelimter()); + boolean endsWithEmptyQuotation = false; + + String emptyTrailingQuotation = config.getQuotationCharacter() + config.getQuotationCharacter(); + + if(config.isQuotingEnabled()){ + endsWithEmptyQuotation = normalizedString.endsWith(emptyTrailingQuotation); + } + + while(endsWithDelimter || endsWithEmptyQuotation){ + + if(endsWithDelimter) + normalizedString = normalizedString.substring(0, normalizedString.length() - config.getDelimter().length()); + + if(endsWithEmptyQuotation) + normalizedString = normalizedString.substring(0, normalizedString.length() - emptyTrailingQuotation.length()); + + endsWithDelimter = normalizedString.endsWith(config.getDelimter()); + + if(config.isQuotingEnabled()){ + endsWithEmptyQuotation = normalizedString.endsWith(emptyTrailingQuotation); + } + + } + + return normalizedString; + + }else{ + return sb.toString(); + } + + } + + + /** + * Returns normalized string of this object + * @param object + * @param itemConfig + * @return + */ + private String normalize(Object object, NormalizeObjectConfig itemConfig) { + if (object == null) + return config.getNullReplacement(); + + allNull = false; + + if (object instanceof String) + return normalize((String) object, itemConfig); + + if (object instanceof Character) + return normalize((Character) object, itemConfig); + + if (object instanceof Integer) + return normalize((Integer) object); + + if (object instanceof Long) + return normalize((Long) object); + + if (object instanceof Short) + return normalize((Short) object); + + if (object instanceof Float) + return normalize((Float) object); + + if (object instanceof Double) + return normalize((Double) object); + + if (object instanceof BigDecimal) + return normalize((BigDecimal) object); + + if (object instanceof BigInteger) + return normalize((BigInteger) object); + + if (object instanceof Date) + return normalize((Date) object); + + if (object instanceof Boolean) + return normalize((Boolean) object); + +// if (object instanceof Byte) +// return normalize((Byte) object); + + throw new IllegalArgumentException("Unsupported data type: " + object.getClass()); + } + + + public String normalize(final Integer value) { + return normalizeNumber(value, config.getIntegerFormat()); + } + + public String normalize(final BigInteger value) { + return normalizeNumber(value, config.getIntegerFormat()); + } + + public String normalize(final String value, final NormalizeObjectConfig itemConfig) { + + String normalized = null; + + if (value == null) { + normalized = config.getNullReplacement(); + } else { + + normalized = value; + + if(itemConfig.isTrimming()){ + normalized = value.trim(); + } + + if (NormalizeObjectConfig.CaseSensitive.UPPER_CASE.equals(itemConfig.getCaseSensitive())){ + normalized = normalized.toUpperCase(); + } + + if (NormalizeObjectConfig.CaseSensitive.LOWER_CASE.equals(itemConfig.getCaseSensitive())){ + normalized = normalized.toLowerCase(); + } + + if(config.isQuotingEnabled()){ + String quoteChar = config.getQuotationCharacter(); + normalized = normalized.replace(quoteChar, quoteChar + quoteChar); + + normalized = quoteChar + normalized + quoteChar; + } + + } + + return normalized; + } + + public String normalize(final Character value, final NormalizeObjectConfig itemConfig) { + return normalize(String.valueOf(value), itemConfig); + } + + public String normalize(final Long value) { + return normalizeNumber(value, config.getIntegerFormat()); + } + + public String normalize(final Float value) { + return normalizeNumber(value, config.getFloatFormat()); + } + + public String normalize(final Double value) { + return normalizeNumber(value, config.getDoubleFormat()); + } + + public String normalize(final Short value) { + return normalizeNumber(value, config.getIntegerFormat()); + } + + public String normalize(final BigDecimal value) { + if (value == null) { + return config.getNullReplacement(); + } + + return value.toPlainString(); + } + + public String normalize(final Boolean value) { + if (value == null) { + return config.getNullReplacement(); + } + + return String.valueOf(value); + } + + public String normalize(final Date value) { + if (value == null) { + return config.getNullReplacement(); + } + + if(config.isDateInMillis()){ + return String.valueOf(value.getTime()); + }else{ + SimpleDateFormat sdf = new SimpleDateFormat(config.getDateFormat()); + return sdf.format(value); + } + } + + private String normalizeNumber(final Number value, final NumberFormat nf) { + String normalized; + + if (value == null) { + normalized = config.getNullReplacement(); + } else { + normalized = nf.format(value); + } + + return normalized; + } + + +} diff --git a/src/main/java/de/cimt/talendcomp/checksum/NormalizeConfig.java b/src/main/java/de/cimt/talendcomp/checksum/NormalizeConfig.java new file mode 100644 index 0000000..8acc68a --- /dev/null +++ b/src/main/java/de/cimt/talendcomp/checksum/NormalizeConfig.java @@ -0,0 +1,243 @@ +package de.cimt.talendcomp.checksum; + +import java.text.NumberFormat; +import java.util.Locale; + +public class NormalizeConfig { + + private NumberFormat floatFormat; + private NumberFormat doubleFormat; + private NumberFormat integerFormat; + + private String delimter; + private String nullReplacement; + private String quotationCharacter; + private boolean quotingEnabled; + private String dateFormat; + private boolean dateInMillis; + private Locale numberFormat; + private int maxFractionFloat; + private int maxFractionDouble; + private boolean modifyHashOutput; + private String hashOutputIfBaseIsNull; + private boolean cutOffEmptyTrailingObjects; + + public NormalizeConfig(String delimter, String nullReplacement, boolean quotingEnabled, String quoteCharacter, String dateFormat, String numberFormat, + int maxFractionFloat, int maxFractionDouble, boolean modifyHashOutput, String hashOutputIfBaseIsNull, boolean dateInMillis, boolean cutOffEmptyTrailingObjects) { + + // check nulls and throw exception + + if(delimter == null) + throw new IllegalArgumentException("delimiter must not be null"); + + if(nullReplacement == null) + throw new IllegalArgumentException("nullReplacement must not be null. At least empty string."); + + if(quoteCharacter == null) + throw new IllegalArgumentException("quoteCharacter must not be null"); + + + if(dateFormat == null) + throw new IllegalArgumentException("dateFormat must not be null"); + + if(numberFormat == null) + throw new IllegalArgumentException("numberFormat must not be null"); + + // set variables + + this.delimter = delimter; + this.nullReplacement = nullReplacement; + this.quotationCharacter = quoteCharacter; + this.quotingEnabled = quotingEnabled; + this.dateFormat = dateFormat; + this.maxFractionFloat = maxFractionFloat; + this.maxFractionDouble = maxFractionDouble; + this.modifyHashOutput = modifyHashOutput; + this.hashOutputIfBaseIsNull = hashOutputIfBaseIsNull; + this.dateInMillis = dateInMillis; + this.cutOffEmptyTrailingObjects = cutOffEmptyTrailingObjects; + + if("ENGLISH".equalsIgnoreCase(numberFormat)){ + this.numberFormat = Locale.ENGLISH; + } else if("GERMAN".equalsIgnoreCase(numberFormat)){ + this.numberFormat = Locale.GERMAN; + } else if("US".equalsIgnoreCase(numberFormat)){ + this.numberFormat = Locale.US; + } else { + throw new IllegalArgumentException("Unsupport number format: " + numberFormat); + } + + this.floatFormat = NumberFormat.getNumberInstance(this.numberFormat); + this.floatFormat.setGroupingUsed(false); + this.floatFormat.setMinimumFractionDigits(0); + this.floatFormat.setMaximumFractionDigits(maxFractionFloat); + + this.doubleFormat = NumberFormat.getNumberInstance(this.numberFormat); + this.doubleFormat.setGroupingUsed(false); + this.doubleFormat.setMinimumFractionDigits(0); + this.doubleFormat.setMaximumFractionDigits(maxFractionDouble); + + this.integerFormat = NumberFormat.getNumberInstance(this.numberFormat); + this.integerFormat.setGroupingUsed(false); + this.integerFormat.setMinimumFractionDigits(0); + this.integerFormat.setMaximumFractionDigits(0); + } + + + + public NumberFormat getFloatFormat() { + return floatFormat; + } + + + public NumberFormat getDoubleFormat() { + return doubleFormat; + } + + + public NumberFormat getIntegerFormat() { + return integerFormat; + } + + + public String getDelimter() { + return delimter; + } + + + public String getNullReplacement() { + return nullReplacement; + } + + + public String getQuotationCharacter() { + return quotationCharacter; + } + + + public boolean isQuotingEnabled() { + return quotingEnabled; + } + + + public String getDateFormat() { + return dateFormat; + } + + + public Locale getNumberFormat() { + return numberFormat; + } + + + public int getMaxFractionFloat() { + return maxFractionFloat; + } + + + public int getMaxFractionDouble() { + return maxFractionDouble; + } + + public void setFloatFormat(NumberFormat floatFormat) { + this.floatFormat = floatFormat; + } + + + public void setDoubleFormat(NumberFormat doubleFormat) { + this.doubleFormat = doubleFormat; + } + + + public void setIntegerFormat(NumberFormat integerFormat) { + this.integerFormat = integerFormat; + } + + + public void setDelimter(String delimter) { + this.delimter = delimter; + } + + + public void setNullReplacement(String nullReplacement) { + this.nullReplacement = nullReplacement; + } + + + public void setQuotationCharacter(String quotationCharacter) { + this.quotationCharacter = quotationCharacter; + } + + + public void setQuotingEnabled(boolean quotingEnabled) { + this.quotingEnabled = quotingEnabled; + } + + + public void setDateFormat(String dateFormat) { + this.dateFormat = dateFormat; + } + + + public void setNumberFormat(Locale numberFormat) { + this.numberFormat = numberFormat; + } + + + public void setMaxFractionFloat(int maxFractionFloat) { + this.maxFractionFloat = maxFractionFloat; + } + + + public void setMaxFractionDouble(int maxFractionDouble) { + this.maxFractionDouble = maxFractionDouble; + } + + public boolean isModifyHashOutput() { + return modifyHashOutput; + } + + + + public void setModifyHashOutput(boolean modifyHashOutput) { + this.modifyHashOutput = modifyHashOutput; + } + + + + public String getHashOutputIfBaseIsNull() { + return hashOutputIfBaseIsNull; + } + + + + public void setHashOutputIfBaseIsNull(String hashOutputIfBaseIsNull) { + this.hashOutputIfBaseIsNull = hashOutputIfBaseIsNull; + } + + + + public boolean isDateInMillis() { + return dateInMillis; + } + + + + public void setDateInMillis(boolean dateInMillis) { + this.dateInMillis = dateInMillis; + } + + + + public boolean isCutOffEmptyTrailingObjects() { + return cutOffEmptyTrailingObjects; + } + + + + public void setCutOffEmptyTrailingObjects(boolean cutOffEmptyTrailingObejcts) { + this.cutOffEmptyTrailingObjects = cutOffEmptyTrailingObejcts; + } + + +} diff --git a/src/main/java/de/cimt/talendcomp/checksum/NormalizeObjectConfig.java b/src/main/java/de/cimt/talendcomp/checksum/NormalizeObjectConfig.java new file mode 100644 index 0000000..1d85539 --- /dev/null +++ b/src/main/java/de/cimt/talendcomp/checksum/NormalizeObjectConfig.java @@ -0,0 +1,53 @@ +package de.cimt.talendcomp.checksum; + +public class NormalizeObjectConfig{ + + public enum CaseSensitive { + CASE_SENSITIVE, + UPPER_CASE, + LOWER_CASE, + NOT_IN_USE + } + + private CaseSensitive caseSensitive = NormalizeObjectConfig.CaseSensitive.UPPER_CASE; + private boolean trimming; + + public NormalizeObjectConfig(String caseSensitive, boolean trimming) { + + if(caseSensitive == null) + throw new IllegalArgumentException("caseSensitive must not be null"); + + if("CASE_SENSITIVE".equalsIgnoreCase(caseSensitive)){ + this.caseSensitive = CaseSensitive.CASE_SENSITIVE; + } else if ("UPPER_CASE".equalsIgnoreCase(caseSensitive)){ + this.caseSensitive = CaseSensitive.UPPER_CASE; + } else if ("LOWER_CASE".equalsIgnoreCase(caseSensitive)) { + this.caseSensitive = CaseSensitive.LOWER_CASE; + } else if ("NOT_IN_USE".equalsIgnoreCase(caseSensitive)) { + this.caseSensitive = CaseSensitive.LOWER_CASE; + } else { + throw new IllegalArgumentException("caseSensitive has to be CASE_SENSITIVE, UPPER_CASE, LOWER_CASE or NOT_IN_USE"); + } + + this.trimming = trimming; + } + + public CaseSensitive getCaseSensitive() { + return caseSensitive; + } + + public boolean isTrimming() { + return trimming; + } + + public void setCaseSensitive(CaseSensitive caseSensitive) { + this.caseSensitive = caseSensitive; + } + + public void setTrimming(boolean trimming) { + this.trimming = trimming; + } + + + +} diff --git a/src/test/java/de/cimt/talendcomp/checksum/HashBuilderTest.java b/src/test/java/de/cimt/talendcomp/checksum/HashBuilderTest.java deleted file mode 100644 index 286891a..0000000 --- a/src/test/java/de/cimt/talendcomp/checksum/HashBuilderTest.java +++ /dev/null @@ -1,119 +0,0 @@ -/** - * Copyright 2015 Jan Lolling jan.lolling@gmail.com - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package de.cimt.talendcomp.checksum; - -import static org.junit.Assert.assertEquals; - -import java.util.Date; - -import org.junit.Test; - -/** - * Unit test for HashBuilder. - * @author Jan Lolling - */ -public class HashBuilderTest { - - /** - * SHA-1 test - * @throws Exception - */ - @Test - public void testAllTypesMD5() throws Exception { - HashBuilder hb = HashBuilder.getMD5HashBuilder(); - hb.caseInsensitive(); - hb.reset(); - hb.add(1); - hb.add(100000l); - hb.add((short) 3); - hb.add(4.5f); - hb.add(12.3456d); - hb.add(new java.math.BigDecimal("1.23456789")); - hb.add(true); - Date testDate = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse("2016-08-18 07:55:33"); - hb.add(testDate); - hb.add("äöüß/\n\r\b"); - String actual = hb.build(); - System.out.println("Value test sha-1: " + actual); - String expected = "efafff817a2a56bad871f282fcbc19b1"; - assertEquals(expected, actual); - } - - /** - * SHA-1 test - * @throws Exception - */ - @Test - public void testAllTypesSHA1() throws Exception { - HashBuilder hb = HashBuilder.getSHA1HashBuilder(); - hb.caseInsensitive(); - hb.reset(); - hb.add(1); - hb.add(100000l); - hb.add((short) 3); - hb.add(4.5f); - hb.add(12.3456d); - hb.add(new java.math.BigDecimal("1.23456789")); - hb.add(true); - Date testDate = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse("2016-08-18 07:55:33"); - hb.add(testDate); - hb.add("äöüß/\n\r\b"); - String actual = hb.build(); - System.out.println("Values test sha-1: " + actual); - String expected = "1e9b3a956e4f8b0331314b2a4a12561cef1582db"; - assertEquals(expected, actual); - } - - /** - * SHA-256 test - * @throws Exception - */ - @Test - public void testAllTypesSHA256() throws Exception { - HashBuilder hb = HashBuilder.getSHA256HashBuilder(); - hb.caseInsensitive(); - hb.reset(); - hb.add(1); - hb.add(100000l); - hb.add((short) 3); - hb.add(4.5f); - hb.add(12.3456d); - hb.add(new java.math.BigDecimal("1.23456789")); - hb.add(true); - Date testDate = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse("2016-08-18 07:55:33"); - hb.add(testDate); - hb.add("äöüß/\n\r\b"); - String actual = hb.build(); - System.out.println("Value test sha-256: " + actual); - String expected = "9dca1f7c3efea4799b7178f4fad17f8079539e55e7307e36b182ee593c6b9eb2"; - assertEquals(expected, actual); - } - - /** - * SHA-256 test - * @throws Exception - */ - @Test - public void testAllTypesNull() throws Exception { - HashBuilder hb = HashBuilder.getSHA256HashBuilder(); - hb.add((String) null); - String actual = hb.build(); - System.out.println("Null test SHA-256: " + actual); - String expected = "74234e98afe7498fb5daf1f36ac2d78acc339464f950703b8c019892f982b90b"; - assertEquals(expected, actual); - } - -} diff --git a/src/test/java/de/cimt/talendcomp/checksum/TestMD5.java b/src/test/java/de/cimt/talendcomp/checksum/TestMD5.java new file mode 100644 index 0000000..00bd49a --- /dev/null +++ b/src/test/java/de/cimt/talendcomp/checksum/TestMD5.java @@ -0,0 +1,197 @@ +package de.cimt.talendcomp.checksum; + +import static org.junit.Assert.*; + +import java.util.Calendar; +import java.util.GregorianCalendar; + +import org.junit.Before; +import org.junit.Test; + + +public class TestMD5 { + + private HashNormalization md5Base; + private NormalizeObjectConfig itemConfig; + private NormalizeConfig config; + + @Before + public void setup() { + config = new NormalizeConfig(";", "", true, "\"", "yyyy-MM-dd'T'HH:mm:ss.SSS", "ENGLISH", 7, 1, false, null, false, false); + md5Base = new HashNormalization(config); + itemConfig = new NormalizeObjectConfig("UPPER_CASE", true); + } + + @Test + public void testNumericValues() throws IllegalArgumentException { + String result; + + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + + md5Base.reset(); + md5Base.add("test", itemConfig); + md5Base.add("123", itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(MD5_TEST_123_STR, result); + + md5Base.reset(); + md5Base.add("test", itemConfig); + md5Base.add(123, itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(MD5_TEST_123, result); + + md5Base.reset(); + md5Base.add("test", itemConfig); + md5Base.add(123L, itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(MD5_TEST_123, result); + + md5Base.reset(); + md5Base.add("test", itemConfig); + md5Base.add(123f, itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(MD5_TEST_123, result); + + md5Base.reset(); + config.setDateInMillis(false); + Calendar calendar = GregorianCalendar.getInstance(); + calendar.clear(); + calendar.set(2017, 00, 01, 12, 5, 30); + md5Base.add("Test", itemConfig); + md5Base.add(calendar.getTime(), itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(MD5_TEST_DATE, result); + + md5Base.reset(); + config.setDateInMillis(true); + calendar = GregorianCalendar.getInstance(); + calendar.clear(); + calendar.set(2017, 00, 01, 12, 5, 30); + md5Base.add("Test", itemConfig); + md5Base.add(calendar.getTime(), itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(MD5_TEST_DATE_AS_MIILS, result); + } + + @Test + public void testLowerCaseString() throws IllegalArgumentException { + + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.LOWER_CASE); + + md5Base.reset(); + md5Base.add("test", itemConfig); + + String result = md5Base.calculateHash("MD5"); + assertEquals(MD5_TEST_LOWER_CASE, result); + } + + @Test + public void testUpperCaseString() throws IllegalArgumentException { + + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + + md5Base.reset(); + md5Base.add("test", itemConfig); + + String result = md5Base.calculateHash("MD5"); + assertEquals(MD5_TEST_UPPER_CASE, result); + } + + @Test + public void testNull() throws IllegalArgumentException { + + md5Base.reset(); + md5Base.add(null, itemConfig); + + String result = md5Base.calculateHash("MD5"); + assertEquals(MD5_EMPTY, result); + } + + @Test + public void testNullNull() throws IllegalArgumentException { + + md5Base.reset(); + md5Base.add(null, itemConfig); + md5Base.add(null, itemConfig); + + String result = md5Base.calculateHash("MD5"); + assertEquals(MD5_SEMICOLON, result); + } + + @Test + public void testEmptyString() throws IllegalArgumentException { + md5Base.reset(); + md5Base.add("", itemConfig); + + String result = md5Base.calculateHash("MD5"); + assertEquals(MD5_SINGLE_EMPTY_STRING, result); + } + + @Test + public void testTwoEmptyStrings() throws IllegalArgumentException { + + md5Base.reset(); + md5Base.add("", itemConfig); + md5Base.add("", itemConfig); + + String result = md5Base.calculateHash("MD5"); + assertEquals(MD5_TWO_EMPTY_STRINGS, result); + } + + /** + * The MD5 hash for the empty string (""). + */ + private static final String MD5_EMPTY = "d41d8cd98f00b204e9800998ecf8427e"; + + /** + * The MD5 hash for the string "TEST" (including quotation marks) + */ + private static final String MD5_TEST_UPPER_CASE = "4cf57dcd20acca8fe832775bdcdb4180"; + + /** + * The MD5 hash for the string "TEST" (including quotation marks) + */ + + private static final String MD5_TEST_LOWER_CASE = "303b5c8988601647873b4ffd247d83cb"; + /** + * The MD5 hash for the String "TEST";123. + */ + private static final String MD5_TEST_123 = "3bd106921f2a35c66c27e4b342de7b4e"; + + /** + * The MD5 hash for the String "TEST";"123". + */ + private static final String MD5_TEST_123_STR = "005c6c49b48561b6f2b4edd7fc9019b5"; + + /** + * The MD5 hash for the string ";" (without quotation marks). + */ + private static final String MD5_SEMICOLON = "9eecb7db59d16c80417c72d1e1f4fbf1"; + + /** + * The MD5 hash for the string "" (including quotation marks). + */ + private static final String MD5_SINGLE_EMPTY_STRING = "9d4568c009d203ab10e33ea9953a0264"; + + /** + * The MD5 hash for the string "";"" (including quotation marks). + */ + private static final String MD5_TWO_EMPTY_STRINGS = "9a3f6d9b3e70fbe3a0934365d3048b04"; + + /** + * The MD5 hash for the string "TEST";2017-01-01T12:05:30.000 (including quotation marks). + */ + private static final String MD5_TEST_DATE = "9b7bcdfa69aa3b0227949d881effa86c"; + + /** + * The MD5 hash for the string "TEST";1483268730000 (including quotation marks). + */ + private static final String MD5_TEST_DATE_AS_MIILS = "04c562d29e75ff3d0461cfd44da11c0a"; + +} diff --git a/src/test/java/de/cimt/talendcomp/checksum/TestNormalization.java b/src/test/java/de/cimt/talendcomp/checksum/TestNormalization.java new file mode 100644 index 0000000..238931f --- /dev/null +++ b/src/test/java/de/cimt/talendcomp/checksum/TestNormalization.java @@ -0,0 +1,425 @@ +package de.cimt.talendcomp.checksum; + +import static org.junit.Assert.assertEquals; + +import java.math.BigDecimal; +import java.util.Calendar; +import java.util.GregorianCalendar; +import org.junit.Before; +import org.junit.Test; + +public class TestNormalization { + + private HashNormalization md5Base; + private NormalizeObjectConfig itemConfig; + private NormalizeConfig config; + + @Before + public void setup() { + config = new NormalizeConfig(";", "", true, "\"", "yyyy-MM-dd'T'HH:mm:ss.SSS", "ENGLISH", 7, 15, false, null, false, false); + md5Base = new HashNormalization(config); + itemConfig = new NormalizeObjectConfig("UPPER_CASE", true); + } + + @Test + public void testLowerCaseString() throws Exception { + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.LOWER_CASE); + String result = md5Base.normalize("Test", itemConfig); + assertEquals("\"test\"", result); + } + + @Test + public void testUpperCaseString() throws Exception { + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + String result = md5Base.normalize("Test", itemConfig); + assertEquals("\"TEST\"", result); + } + + @Test + public void testCaseSensitiveString() throws Exception { + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.CASE_SENSITIVE); + String result = md5Base.normalize("TeSt", itemConfig); + assertEquals("\"TeSt\"", result); + } + + @Test + public void testSpecialCharacters() throws Exception { + + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + itemConfig.setTrimming(true); + + assertEquals("\"STRASSE\"", md5Base.normalize("Straße",itemConfig)); + assertEquals("\"MÜSSIGKEIT\"", md5Base.normalize("Müßigkeit",itemConfig)); + assertEquals("\"TEST!?&%$§#+*~{}[]()/\\<>|^°\"", md5Base.normalize("TEST!?&%$§#+*~{}[]()/\\<>|^°",itemConfig)); + } + + @Test + public void testTrimming() throws Exception { + + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + itemConfig.setTrimming(true); + + + // left trimming + assertEquals("\"TEST\"", md5Base.normalize(" test", itemConfig)); + assertEquals("\"TEST\"", md5Base.normalize(" test", itemConfig)); + assertEquals("\"TEST\"", md5Base.normalize("\ttest", itemConfig)); + + // right trimming + assertEquals("\"TEST\"", md5Base.normalize("test ", itemConfig)); + assertEquals("\"TEST\"", md5Base.normalize("test ", itemConfig)); + assertEquals("\"TEST\"", md5Base.normalize("test\t", itemConfig)); + + } + + @Test + public void testQuoting() throws Exception { + + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + itemConfig.setTrimming(true); + + assertEquals("\"ABC\"", md5Base.normalize("abc", itemConfig)); + assertEquals("\"COLUMN;ONE\"", md5Base.normalize("column;one", itemConfig)); + assertEquals("\"COLUMN\"\"TWO\"", md5Base.normalize("column\"two", itemConfig)); + + config.setQuotationCharacter("#"); + assertEquals("#ABC#", md5Base.normalize("abc", itemConfig)); + assertEquals("#COLUMN;ONE#", md5Base.normalize("column;one", itemConfig)); + assertEquals("#COLUMN\"TWO#", md5Base.normalize("column\"two", itemConfig)); + + config.setQuotingEnabled(false); + assertEquals("ABC", md5Base.normalize("abc", itemConfig)); + assertEquals("COLUMN;ONE", md5Base.normalize("column;one", itemConfig)); + assertEquals("COLUMN\"TWO", md5Base.normalize("column\"two", itemConfig)); + + } + + @Test + public void testChar() throws Exception { + + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + itemConfig.setTrimming(true); + + assertEquals("\"A\"", md5Base.normalize('A', itemConfig)); + assertEquals("\"A\"", md5Base.normalize('a', itemConfig)); + + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.LOWER_CASE); + assertEquals("\"a\"", md5Base.normalize('A', itemConfig)); + } + + @Test + public void testNumericValues() throws IllegalArgumentException { + + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + itemConfig.setTrimming(true); + + // default normalization + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add("123", itemConfig); + assertEquals("\"TEST\";\"123\"", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(123, itemConfig); + assertEquals("\"TEST\";123", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(123L, itemConfig); + assertEquals("\"TEST\";123", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(123f, itemConfig); + assertEquals("\"TEST\";123", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(123.5f, itemConfig); + assertEquals("\"TEST\";123.5", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(123d, itemConfig); + assertEquals("\"TEST\";123", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(123.5d, itemConfig); + assertEquals("\"TEST\";123.5", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(1.0/9, itemConfig); + assertEquals("\"TEST\";0.111111111111111", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(2.0/9, itemConfig); + assertEquals("\"TEST\";0.222222222222222", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(3.0/9, itemConfig); + assertEquals("\"TEST\";0.333333333333333", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(4.0/9, itemConfig); + assertEquals("\"TEST\";0.444444444444444", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(5.0/9, itemConfig); + assertEquals("\"TEST\";0.555555555555556", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(6.0/9, itemConfig); + assertEquals("\"TEST\";0.666666666666667", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(9.0/9, itemConfig); + assertEquals("\"TEST\";1", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(13d/3, itemConfig); + assertEquals("\"TEST\";4.333333333333333", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(13f/3, itemConfig); + assertEquals("\"TEST\";4.3333335", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(5555432100L, itemConfig); + assertEquals("\"TEST\";5555432100", md5Base.getNormalizedString()); + + } + + @Test + public void testShort() { + assertEquals("0", md5Base.normalize((short) 0)); + assertEquals("127", md5Base.normalize((short) 127)); + assertEquals("255", md5Base.normalize((short) 255)); + assertEquals("-32768", md5Base.normalize(Short.MIN_VALUE)); + assertEquals("32767", md5Base.normalize(Short.MAX_VALUE)); + } + + @Test + public void testLargeAndSmallBigDecimals() { + BigDecimal b = new BigDecimal("-47.11e16"); + assertEquals("-471100000000000000", md5Base.normalize(b)); + assertEquals("-471100000000000000", md5Base.normalize(b)); + + BigDecimal b2 = new BigDecimal("-47.11e-16"); + assertEquals("-0.000000000000004711", md5Base.normalize(b2)); + assertEquals("-0.000000000000004711", md5Base.normalize(b2)); + + BigDecimal b3 = new BigDecimal("15.15"); + assertEquals("15.15", md5Base.normalize(b3)); + assertEquals("15.15", md5Base.normalize(b3)); + + BigDecimal b4 = new BigDecimal("15.1500000000000000000000"); + assertEquals("15.1500000000000000000000", md5Base.normalize(b4)); + assertEquals("15.1500000000000000000000", md5Base.normalize(b4)); + } + + @Test + public void testBool() throws IllegalArgumentException { + assertEquals("true", md5Base.normalize(true)); + } + + @Test + public void testNull() throws IllegalArgumentException { + String s = null; + Integer i = null; + Float f = null; + Double d = null; + + config.setNullReplacement(""); + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + itemConfig.setTrimming(true); + + assertEquals("", md5Base.normalize(s, itemConfig)); + assertEquals("", md5Base.normalize(i)); + assertEquals("", md5Base.normalize(f)); + assertEquals("", md5Base.normalize(d)); + + config.setNullReplacement("null"); + assertEquals("null", md5Base.normalize(s,itemConfig)); + assertEquals("null", md5Base.normalize(i)); + assertEquals("null", md5Base.normalize(f)); + assertEquals("null", md5Base.normalize(d)); + } + + @Test + public void testNullNull() throws IllegalArgumentException { + + config.setNullReplacement(""); + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.UPPER_CASE); + itemConfig.setTrimming(true); + + md5Base.reset(); + md5Base.add(null, itemConfig); + md5Base.add(null, itemConfig); + + assertEquals(";", md5Base.getNormalizedString()); + } + + @Test + public void testCaseSensitiveNormalization() throws IllegalArgumentException { + + config.setNullReplacement(""); + itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.CASE_SENSITIVE); + itemConfig.setTrimming(true); + + md5Base.reset(); + md5Base.add(123, itemConfig); + md5Base.add("Test", itemConfig); + md5Base.add("Lorem ipsum dolor sit amet", itemConfig); + + assertEquals("123;\"Test\";\"Lorem ipsum dolor sit amet\"", md5Base.getNormalizedString()); + } + + @Test + public void testDate() { + + config.setDateInMillis(false); + + Calendar calendar = GregorianCalendar.getInstance(); + // Clear all fields, especially milliseconds + calendar.clear(); + + // 2017-01-01T12:05:30 (month in calendar is zero-based) + calendar.set(2017, 00, 01, 12, 5, 30); + assertEquals("2017-01-01T12:05:30.000", md5Base.normalize(calendar.getTime())); + + calendar.clear(); + calendar.set(2017, 05, 18, 13, 42, 05); + assertEquals("2017-06-18T13:42:05.000", md5Base.normalize(calendar.getTime())); + } + + @Test + public void testDateInMillis(){ + + config.setDateInMillis(true); + + Calendar calendar = GregorianCalendar.getInstance(); + // Clear all fields, especially milliseconds + calendar.clear(); + + // 2017-01-01T12:05:30 (month in calendar is zero-based) + calendar.set(2017, 00, 01, 12, 00, 00); + assertEquals("1483268400000", md5Base.normalize(calendar.getTime())); + + } + + + @Test + public void testCutOffEmptyTrailingWithQuotation(){ + + config.setCutOffEmptyTrailingObjects(true); + config.setQuotingEnabled(true); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add("", itemConfig); + assertEquals("\"TEST\"", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add("", itemConfig); + md5Base.add("", itemConfig); + assertEquals("\"TEST\"", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(null, itemConfig); + md5Base.add("", itemConfig); + assertEquals("\"TEST\"", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add("", itemConfig); + md5Base.add(null, itemConfig); + assertEquals("\"TEST\"", md5Base.getNormalizedString()); + + config.setCutOffEmptyTrailingObjects(false); + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add("", itemConfig); + md5Base.add("", itemConfig); + assertEquals("\"TEST\";\"\";\"\"", md5Base.getNormalizedString()); + } + + @Test + public void testCutOffEmptyTrailingWithOutQuotation(){ + + config.setCutOffEmptyTrailingObjects(true); + config.setQuotingEnabled(false); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add("", itemConfig); + assertEquals("TEST", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add("", itemConfig); + md5Base.add("", itemConfig); + assertEquals("TEST", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add(null, itemConfig); + md5Base.add("", itemConfig); + assertEquals("TEST", md5Base.getNormalizedString()); + + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add("", itemConfig); + md5Base.add(null, itemConfig); + assertEquals("TEST", md5Base.getNormalizedString()); + + config.setCutOffEmptyTrailingObjects(false); + md5Base.reset(); + md5Base.add("Test", itemConfig); + md5Base.add("", itemConfig); + md5Base.add("", itemConfig); + + assertEquals("TEST;;", md5Base.getNormalizedString()); + } + +// @Ignore +// @Test +// public void testMassiveUse() { +// +// itemConfig.setCaseSensitive(NormalizeObjectConfig.CaseSensitive.CASE_SENSITIVE); +// itemConfig.setTrimming(true); +// +// Calendar calendar = GregorianCalendar.getInstance(); +// // Clear all fields, especially milliseconds +// calendar.clear(); +// +// // 2017-01-01T12:05:30 (month in calendar is zero-based) +// calendar.set(2017, 00, 01, 12, 5, 30); +// +// for (int i = 0; i < 100000; i++) { +// +// md5Base.reset(); +// md5Base.add("Test",itemConfig); +// md5Base.add(i,itemConfig); +// md5Base.add(calendar.getTime(),itemConfig); +// md5Base.add("Lorem ipsum dolor sit amet",itemConfig); +// +// assertEquals("\"Test\";" + i + ";2017-01-01T12:05:30.000;\"Lorem ipsum dolor sit amet\"",md5Base.getNormalizedString()); +// } +// +// } + +} diff --git a/src/test/java/de/cimt/talendcomp/checksum/TestSpecialBehaviour.java b/src/test/java/de/cimt/talendcomp/checksum/TestSpecialBehaviour.java new file mode 100644 index 0000000..216e928 --- /dev/null +++ b/src/test/java/de/cimt/talendcomp/checksum/TestSpecialBehaviour.java @@ -0,0 +1,125 @@ +package de.cimt.talendcomp.checksum; + +import static org.junit.Assert.*; + +import org.junit.Test; +import static org.hamcrest.CoreMatchers.*; + +public class TestSpecialBehaviour { + + private HashNormalization md5Base; + private NormalizeObjectConfig itemConfig; + private NormalizeConfig config; + + /** + * Test null with modified output + */ + @Test + public void testModifyOutput1(){ + + config = new NormalizeConfig(";", "", true, "\"", "yyyy-MM-dd'T'HH:mm:ss.SSS", "ENGLISH", 7, 15, true, HASH_OUTPUT_REPLACEMENT,false, false); + md5Base = new HashNormalization(config); + itemConfig = new NormalizeObjectConfig("UPPER_CASE", true); + + String result; + + md5Base.reset(); + md5Base.add(null, itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(HASH_OUTPUT_REPLACEMENT, result); + + } + + /** + * Test "" with modified output and quotation disabled + */ + @Test + public void testModifyOutput2(){ + + config = new NormalizeConfig(";", "", false, "\"", "yyyy-MM-dd'T'HH:mm:ss.SSS", "ENGLISH", 7, 15, true, HASH_OUTPUT_REPLACEMENT, false, false); + md5Base = new HashNormalization(config); + itemConfig = new NormalizeObjectConfig("UPPER_CASE", true); + + String result; + + md5Base.reset(); + md5Base.add("", itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(HASH_OUTPUT_REPLACEMENT, result); + + } + + /** + * Test "" with modified output and quotation disabled + */ + @Test + public void testModifyOutputAllNull(){ + + config = new NormalizeConfig(";", "", false, "\"", "yyyy-MM-dd'T'HH:mm:ss.SSS", "ENGLISH", 7, 15, true, HASH_OUTPUT_REPLACEMENT, false, false); + md5Base = new HashNormalization(config); + itemConfig = new NormalizeObjectConfig("UPPER_CASE", true); + + String result; + + md5Base.reset(); + md5Base.add(null, itemConfig); + md5Base.add(null, itemConfig); + md5Base.add(null, itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(HASH_OUTPUT_REPLACEMENT, result); + + } + + /** + * Test "" with modified output and quotation disabled + */ + @Test + public void testModifyOutputAllNullWithCutOf(){ + + config = new NormalizeConfig(";", "", false, "\"", "yyyy-MM-dd'T'HH:mm:ss.SSS", "ENGLISH", 7, 15, true, HASH_OUTPUT_REPLACEMENT, false, true); + md5Base = new HashNormalization(config); + itemConfig = new NormalizeObjectConfig("UPPER_CASE", true); + + String result; + + md5Base.reset(); + md5Base.add(null, itemConfig); + md5Base.add(null, itemConfig); + md5Base.add(null, itemConfig); + + result = md5Base.calculateHash("MD5"); + assertEquals(HASH_OUTPUT_REPLACEMENT, result); + + } + + /** + * Test "" with modified output and quotation disabled + */ + @Test + public void testModifyOutputNotAllNull(){ + + config = new NormalizeConfig(";", "", false, "\"", "yyyy-MM-dd'T'HH:mm:ss.SSS", "ENGLISH", 7, 15, true, HASH_OUTPUT_REPLACEMENT, false, false); + md5Base = new HashNormalization(config); + itemConfig = new NormalizeObjectConfig("UPPER_CASE", true); + + String result; + + md5Base.reset(); + md5Base.add(null, itemConfig); + md5Base.add("Test", itemConfig); + md5Base.add(null, itemConfig); + + result = md5Base.calculateHash("MD5"); + assertThat(result, is(not(HASH_OUTPUT_REPLACEMENT))); + assertEquals("6e618343fec0d14c498ecd6b1a2c5c41",result); + } + + /** + * Hashoutput replacement + */ + private static final String HASH_OUTPUT_REPLACEMENT = "###"; + +} diff --git a/talend_component/tHashRow/jlo-talendcomp-hash-1.2.jar b/talend_component/tHashRow/jlo-talendcomp-hash-1.2.jar deleted file mode 100644 index c1eea39..0000000 Binary files a/talend_component/tHashRow/jlo-talendcomp-hash-1.2.jar and /dev/null differ diff --git a/talend_component/tHashRow/tHashRow_begin.javajet b/talend_component/tHashRow/tHashRow_begin.javajet index ba8cfbf..7f26622 100644 --- a/talend_component/tHashRow/tHashRow_begin.javajet +++ b/talend_component/tHashRow/tHashRow_begin.javajet @@ -9,24 +9,27 @@ CodeGeneratorArgument codeGenArgument = (CodeGeneratorArgument) argument; INode node = (INode) codeGenArgument.getArgument(); String cid = node.getUniqueName(); - String hashType = ElementParameterParser.getValue(node, "__HASH_TYPE__"); - boolean caseInsensitive = "true".equals(ElementParameterParser.getValue(node, "__CASE_INSENSITIVE__")); + String delimiter = ElementParameterParser.getValue(node, "__DELIMITER__"); String nullReplacement = ElementParameterParser.getValue(node, "__NULL_REPLACEMENT__"); if (nullReplacement == null || nullReplacement.trim().isEmpty()) { - nullReplacement = "null"; + nullReplacement = ""; } + String fractionSizeFloat = ElementParameterParser.getValue(node, "__FRACTION_SIZE_FLOAT__"); + String fractionSizeDouble = ElementParameterParser.getValue(node, "__FRACTION_SIZE_DOUBLE__"); + String numberFormat = ElementParameterParser.getValue(node, "__NUMBER_FORMAT__"); + String dateFormat = ElementParameterParser.getValue(node, "__DATE_FORMAT__"); + boolean enableQuoting = ("true").equals(ElementParameterParser.getValue(node,"__ENABLE_STRING_QUOTING__")); + String quoteCharacter = ElementParameterParser.getValue(node, "__QUOTE_CHARACTER__"); + boolean modifyHashOutput = ("true").equals(ElementParameterParser.getValue(node,"__MODIFY_HASH_OUTPUT_IF_NULL__")); + String hashOutputIfBaseIsNull = ElementParameterParser.getValue(node, "__HASH_VALUE_IF_NULL__"); + if (hashOutputIfBaseIsNull == null || hashOutputIfBaseIsNull.trim().isEmpty()) { + hashOutputIfBaseIsNull = ""; + } + boolean dateInMillis = ("true").equals(ElementParameterParser.getValue(node,"__ENABLE_DATE_AS_MILLIS__")); + boolean cutOffEmptyTrailingObjects = ("true").equals(ElementParameterParser.getValue(node,"__CUT_OFF_TRAILING_OBJECTS__")); %> -<% if ("MD5".equals(hashType)) { %> - de.cimt.talendcomp.checksum.HashBuilder <%=cid%> = de.cimt.talendcomp.checksum.HashBuilder.getMD5HashBuilder(); -<% } else if ("SHA1".equals(hashType)) { %> - de.cimt.talendcomp.checksum.HashBuilder <%=cid%> = de.cimt.talendcomp.checksum.HashBuilder.getSHA1HashBuilder(); -<% } else if ("SHA256".equals(hashType)) { %> - de.cimt.talendcomp.checksum.HashBuilder <%=cid%> = de.cimt.talendcomp.checksum.HashBuilder.getSHA256HashBuilder(); -<% } %> -<% if (caseInsensitive) { %> - <%=cid%>.caseInsensitive(); - // if the null replacement is null, internally the String "null" will be used - <%=cid%>.setNullReplacement(<%=nullReplacement%>); -<% } %> - globalMap.put("<%=cid%>", <%=cid%>); + + de.cimt.talendcomp.checksum.NormalizeConfig <%=cid%>_config = new de.cimt.talendcomp.checksum.NormalizeConfig(<%=delimiter%>, <%=nullReplacement%>, <%=enableQuoting%>, <%=quoteCharacter%>, <%=dateFormat%>, "<%=numberFormat%>", <%=fractionSizeFloat%>, <%=fractionSizeDouble%>, <%=modifyHashOutput%>, <%=hashOutputIfBaseIsNull%>, <%=dateInMillis%>, <%=cutOffEmptyTrailingObjects%>); + de.cimt.talendcomp.checksum.HashNormalization <%=cid%>_hn = new de.cimt.talendcomp.checksum.HashNormalization(<%=cid%>_config); + globalMap.put("<%=cid%>_config", <%=cid%>_config); int <%=cid%>_nbLines = 0; \ No newline at end of file diff --git a/talend_component/tHashRow/tHashRow_java.xml b/talend_component/tHashRow/tHashRow_java.xml index 97179ea..171534c 100644 --- a/talend_component/tHashRow/tHashRow_java.xml +++ b/talend_component/tHashRow/tHashRow_java.xml @@ -1,5 +1,4 @@ -
@@ -22,31 +21,74 @@ - + - + - + + + + + + + + + + - - - "null" + + ";" + + + "" + + 7 + + + 15 + + + + + + + + + + TRUE + + + "yyyy-MM-dd'T'HH:mm:ss.SSS" + + + + "\"" + + + FALSE + - - Release: 1.2 build at: 20170407 + + + "22222222222222222222222222222222" + + + + + Release: 1.4 build at: 20171024 - + diff --git a/talend_component/tHashRow/tHashRow_main.javajet b/talend_component/tHashRow/tHashRow_main.javajet index 082752e..de630bf 100644 --- a/talend_component/tHashRow/tHashRow_main.javajet +++ b/talend_component/tHashRow/tHashRow_main.javajet @@ -17,7 +17,11 @@ CodeGeneratorArgument codeGenArgument = (CodeGeneratorArgument) argument; INode node = (INode) codeGenArgument.getArgument(); String cid = node.getUniqueName(); + String hashType = ElementParameterParser.getValue(node, "__HASH_TYPE__"); String outputColumn = ElementParameterParser.getValue(node, "__OUTPUT_COLUMN__"); + boolean exposeHashBase = ("true").equals(ElementParameterParser.getValue(node,"__EXPOSE_HASH_BASE__")); + String hashBaseOutputColumn = ElementParameterParser.getValue(node, "__HASH_BASE_OUTPUT_COLUMN__"); + IConnection connIn = null; Map inColumns = new HashMap(); // to check the out going columns for hand over the data List inConns = NodeUtil.getIncomingConnections(node, IConnectionCategory.DATA); @@ -34,45 +38,68 @@ connOut = outConns.get(0); } %> - <%=cid%>.reset(); + + <%=cid%>_hn.reset(); <%=cid%>_nbLines++; globalMap.put("<%=cid%>_NB_LINE", <%=cid%>_nbLines); <% @SuppressWarnings("unchecked") List> columnList = (List>) ElementParameterParser.getObjectValue(node, "__COLUMN_CONFIG__"); if (connIn != null && columnList != null) { + + boolean use = false; + boolean itemTrimming = false; + String itemCaseSensetive = ""; + for (Map ce : columnList) { String columnName = ce.get("SCHEMA_COLUMN"); + + + use = "true".equalsIgnoreCase(ce.get("USE")); + itemTrimming = "true".equals(ce.get("TRIM")); + itemCaseSensetive = ce.get("CASE_SENSITIVE"); + if (columnName.equals(outputColumn)) { continue; } + IMetadataColumn column = inColumns.get(columnName); if (column == null) { continue; } - boolean use = "true".equals(ce.get("USE")); - if (use) { %> - try { - <%=cid%>.add(<%=connIn.getName()%>.<%=columnName%>); - } catch (Exception e) { - globalMap.put("<%=cid%>_ERROR_MESSAGE","Convert to String and add column <%=columnName%> to checksum failed:" + e.getMessage()); - throw e; - } -<% } + + if (use) { +%> + de.cimt.talendcomp.checksum.NormalizeObjectConfig <%=cid%>_itemConfig_<%=columnName%> = new de.cimt.talendcomp.checksum.NormalizeObjectConfig("<%=itemCaseSensetive%>", <%=itemTrimming%>); + try { + <%=cid%>_hn.add(<%=connIn.getName()%>.<%=columnName%>, <%=cid%>_itemConfig_<%=columnName%>); + } catch (Exception e) { + globalMap.put("<%=cid%>_ERROR_MESSAGE","Convert to String and add column <%=columnName%> to checksum failed:" + e.getMessage()); + throw e; + } + +<% } // END if(use) + if (connOut != null) { %> - <%=connOut.getName()%>.<%=columnName%> = <%=connIn.getName()%>.<%=columnName%>; + <%=connOut.getName()%>.<%=columnName%> = <%=connIn.getName()%>.<%=columnName%>; <% } - } // for + + } // END for } %> try { String hash = null; - if (<%=cid%>.allValuesAreNull() == false) { - hash = <%=cid%>.build(); - } - globalMap.put("<%=cid%>_HASH", hash); -<% if (connOut != null && outputColumn != null && outputColumn.isEmpty() == false) { %> - <%=connOut.getName()%>.<%=outputColumn%> = hash; -<% } %> + hash = <%=cid%>_hn.calculateHash("<%=hashType%>"); + +<% if (connOut != null && outputColumn != null && outputColumn.isEmpty() == false) { %> + <%=connOut.getName()%>.<%=outputColumn%> = hash; + globalMap.put("<%=cid%>_HASH", hash); +<% } %> + + +<% if (exposeHashBase){ %> + <%=connOut.getName()%>.<%=hashBaseOutputColumn%> = <%=cid%>_hn.getNormalizedString(); +<% } %> + } catch (Exception e) { globalMap.put("<%=cid%>_ERROR_MESSAGE","Build hash failed:" + e.getMessage()); throw e; diff --git a/talend_component/tHashRow/tHashRow_messages.properties b/talend_component/tHashRow/tHashRow_messages.properties index 6e47467..20e7722 100644 --- a/talend_component/tHashRow/tHashRow_messages.properties +++ b/talend_component/tHashRow/tHashRow_messages.properties @@ -8,12 +8,38 @@ HASH_TYPE.ITEM.SHA1=SHA-1 HASH_TYPE.ITEM.SHA256=SHA-256 OUTPUT_COLUMN.NAME=Hash output column (must be String typed) +MODIFY_HASH_OUTPUT_IF_NULL.NAME=Modify hash output if all hash base values are empty +HASH_VALUE_IF_NULL.NAME=Modified hash output +DELIMITER.NAME=Delimiter COLUMN_CONFIG.NAME=Relevant fields COLUMN_CONFIG.ITEM.USE=Use +COLUMN_CONFIG.ITEM.TRIM=Trim +COLUMN_CONFIG.ITEM.CASE_SENSITIVE=Case Sensitive +COLUMN_CONFIG.ITEM.CASE_SENSITIVE.ITEM.CASE_SENSITIVE=Case Sensitive +COLUMN_CONFIG.ITEM.CASE_SENSITIVE.ITEM.UPPER_CASE=Upper Case +COLUMN_CONFIG.ITEM.CASE_SENSITIVE.ITEM.LOWER_CASE=Lower Case +COLUMN_CONFIG.ITEM.CASE_SENSITIVE.ITEM.NOT_IN_USE=Not In Use -CASE_INSENSITIVE.NAME=Strings are used case insensitive -NULL_REPLACEMENT.NAME=null replacement +NULL_REPLACEMENT.NAME=Null replacement +FRACTION_SIZE_FLOAT.NAME=Fraction size (float) +FRACTION_SIZE_DOUBLE.NAME=Fraction size (double) + +NUMBER_FORMAT.NAME=Number format +NUMBER_FORMAT.ITEM.ENGLISH=English +NUMBER_FORMAT.ITEM.GERMAN=German +NUMBER_FORMAT.ITEM.US=US + +ENABLE_DATE_AS_MILLIS.NAME=Format date as millseconds +DATE_FORMAT.NAME=Date format + +ENABLE_STRING_QUOTING.NAME=Enable string quoting +QUOTE_CHARACTER.NAME=Quoting character + +CUT_OFF_TRAILING_OBJECTS.NAME=Cut off empty trailing hash input values + +EXPOSE_HASH_BASE.NAME=Expose hash input +HASH_BASE_OUTPUT_COLUMN.NAME=Column to display hash input NB_LINE.NAME=Count Rows HASH.NAME=Current hash value \ No newline at end of file diff --git a/talend_component/tHashRow/thashrow-1.4.jar b/talend_component/tHashRow/thashrow-1.4.jar new file mode 100644 index 0000000..db74a85 Binary files /dev/null and b/talend_component/tHashRow/thashrow-1.4.jar differ diff --git a/talend_jobs/sample.zip b/talend_jobs/sample.zip new file mode 100644 index 0000000..9fe20d8 Binary files /dev/null and b/talend_jobs/sample.zip differ diff --git a/talend_jobs/tHashRow.zip b/talend_jobs/tHashRow.zip new file mode 100644 index 0000000..1767107 Binary files /dev/null and b/talend_jobs/tHashRow.zip differ