From 29248b30c5e61a9c7ef0d51f8565a23ce3018bbe Mon Sep 17 00:00:00 2001 From: Hannes Wellmann Date: Fri, 24 Mar 2023 19:38:54 +0100 Subject: [PATCH] Make comparison of newlines in text files more precise Don't ignore all kind of newlines when comparing text-files, only treat exactly matching \n and \r\n as equals. (cherry picked from commit 199647e71d0ceb01ee8e8fba1d4f05a5c0d1fe72) --- .../internal/TextComparator.java | 89 ++++++------ .../internal/TextComparatorTest.java | 136 ++++++++++++++++++ 2 files changed, 184 insertions(+), 41 deletions(-) create mode 100644 tycho-artifactcomparator/src/test/java/org/eclipse/tycho/zipcomparator/internal/TextComparatorTest.java diff --git a/tycho-artifactcomparator/src/main/java/org/eclipse/tycho/zipcomparator/internal/TextComparator.java b/tycho-artifactcomparator/src/main/java/org/eclipse/tycho/zipcomparator/internal/TextComparator.java index cff3185c6a..34b6589f3d 100644 --- a/tycho-artifactcomparator/src/main/java/org/eclipse/tycho/zipcomparator/internal/TextComparator.java +++ b/tycho-artifactcomparator/src/main/java/org/eclipse/tycho/zipcomparator/internal/TextComparator.java @@ -12,9 +12,9 @@ *******************************************************************************/ package org.eclipse.tycho.zipcomparator.internal; -import java.io.EOFException; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -36,6 +36,14 @@ public class TextComparator implements ContentsComparator { static final String HINT = "txt"; + private static final char CR = '\r'; + private static final char LF = '\n'; + + // Possible new lines: + // \n -- unix style + // \r\n -- windows style + // \r -- old Mac OS 9 style, recent Mac OS X/macOS use \n + @Override public ArtifactDelta getDelta(ComparatorInputStream baseline, ComparatorInputStream reactor, ComparisonData data) throws IOException { @@ -43,55 +51,54 @@ public ArtifactDelta getDelta(ComparatorInputStream baseline, ComparatorInputStr } public static ArtifactDelta compareText(ComparatorInputStream baseline, ComparatorInputStream reactor, - ComparisonData data) throws IOException { - ByteIterator baselineIterator = new ByteIterator(baseline.asBytes()); - ByteIterator reactorIterator = new ByteIterator(reactor.asBytes()); - while (baselineIterator.hasNext() && reactorIterator.hasNext()) { - if (baselineIterator.next() != reactorIterator.next()) { - return createDelta(ArtifactDelta.DEFAULT.getMessage(), baseline, reactor, data); - } - } - //now both need to be at the end of the stream if they are the same! - if (baselineIterator.hasNext() || reactorIterator.hasNext()) { + ComparisonData data) { + if (!isEqualTextIngoreNewLine(baseline.asBytes(), reactor.asBytes())) { return createDelta(ArtifactDelta.DEFAULT.getMessage(), baseline, reactor, data); } return ArtifactDelta.NO_DIFFERENCE; } - private static final class ByteIterator { - - private byte[] bytes; - private int index; - - public ByteIterator(byte[] bytes) { - this.bytes = bytes; - } - - byte next() throws EOFException { - if (hasNext()) { - byte b = bytes[index]; - index++; - return b; + /** + * Tests if {@code baseline} and {@code reactor} contain equal text, if line-endings are + * ignored. + * + * @implNote This methods is intended to have the same results as if the entire content of each + * array were read and compared line by line using BufferedReader.readLine(), which + * only returns the line content, without terminators. The actual implementation is + * just more efficient, because it does not create String objects for the entire + * content. + */ + public static boolean isEqualTextIngoreNewLine(byte[] baseline, byte[] reactor) { + int indexBaseline = 0; + int indexReactor = 0; + int mismatch = Arrays.mismatch(baseline, reactor); + while (mismatch >= 0) { + indexBaseline += mismatch; + indexReactor += mismatch; + int baselineNewLine = newLineLength(baseline, indexBaseline); + int reactorNewLine = newLineLength(reactor, indexReactor); + if (baselineNewLine < 0 || reactorNewLine < 0) { + return false; } - throw new EOFException(); - } - - boolean hasNext() { - skipNewLines(); - return index < bytes.length; + // Both sliders are at either "\n" or "\r\n" + indexBaseline += baselineNewLine; + indexReactor += reactorNewLine; + mismatch = Arrays.mismatch(baseline, indexBaseline, baseline.length, reactor, indexReactor, reactor.length); } + return true; + } - private void skipNewLines() { - while (index < bytes.length) { - byte b = bytes[index]; - if (b == '\n' || b == '\r') { - index++; - continue; - } - return; + private static int newLineLength(byte[] bytes, int index) { + if (index < bytes.length) { + if (bytes[index] == LF + // Prevent "\r\n" and "\r\r\n" from being treated as equals + && (index == 0 || bytes[index - 1] != CR)) { + return 1; + } else if (bytes[index] == CR) { + return index + 1 < bytes.length && bytes[index + 1] == LF ? 2 : 1; } } - + return -1; } @Override @@ -109,7 +116,7 @@ public static ArtifactDelta createDelta(String message, ComparatorInputStream ba Patch patch = DiffUtils.diff(source, target); List unifiedDiffList = UnifiedDiffUtils.generateUnifiedDiff("baseline", "reactor", source, patch, 0); - detailed = unifiedDiffList.stream().collect(Collectors.joining((System.lineSeparator()))); + detailed = unifiedDiffList.stream().collect(Collectors.joining(System.lineSeparator())); } catch (Exception e) { detailed = message; } diff --git a/tycho-artifactcomparator/src/test/java/org/eclipse/tycho/zipcomparator/internal/TextComparatorTest.java b/tycho-artifactcomparator/src/test/java/org/eclipse/tycho/zipcomparator/internal/TextComparatorTest.java new file mode 100644 index 0000000000..b13d66613b --- /dev/null +++ b/tycho-artifactcomparator/src/test/java/org/eclipse/tycho/zipcomparator/internal/TextComparatorTest.java @@ -0,0 +1,136 @@ +package org.eclipse.tycho.zipcomparator.internal; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import org.eclipse.tycho.artifactcomparator.ArtifactComparator.ComparisonData; +import org.eclipse.tycho.artifactcomparator.ArtifactDelta; +import org.eclipse.tycho.artifactcomparator.ComparatorInputStream; +import org.junit.Test; + +public class TextComparatorTest { + private static final String NL = System.lineSeparator(); + + @Test + public void testEqualText() throws IOException { + String text = "FirstLine\nline2\n"; + assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(text, text)); + } + + @Test + public void testNotEqualText() throws IOException { + String baseline = "FirstLine\nline2\n"; + String reactor = "line1\nline2\n"; + + ArtifactDelta delta = getTextDelta(baseline, reactor); + assertDeltaWithDetails( + "--- baseline" + NL + "+++ reactor" + NL + "@@ -1,1 +1,1 @@" + NL + "-FirstLine" + NL + "+line1", + delta); + } + + @Test + public void testEqualTextWhenIgnoringLineEndings() throws IOException { + { + String baseline = "FirstLine\r\nline2\r\nline3"; + String reactor = "FirstLine\nline2\nline3"; + assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(baseline, reactor)); + } + { + String baseline = "\r\nFirstLine\r\nline2\r\n"; + String reactor = "\nFirstLine\nline2\n"; + assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(baseline, reactor)); + } + { + String baseline = "\r\n\r\nFirstLine\r\n\r\nline2\r\n\r\n"; + String reactor = "\n\nFirstLine\n\nline2\n\n"; + assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(baseline, reactor)); + } + { // mixed styles in one string + String baseline = "\n\r\nFirstLine\r\n\nline2\n\r\n"; + String reactor = "\n\nFirstLine\n\nline2\n\n"; + assertEquals(ArtifactDelta.NO_DIFFERENCE, getTextDelta(baseline, reactor)); + } + } + + @Test + public void testNotEqualTextWithDifferentCRandLFcombinations() throws IOException { + + { + String baseline = "line1\n\rline2"; + String expectedDelta = "--- baseline" + NL + "+++ reactor" + NL + "@@ -2,1 +2,0 @@" + NL + "-"; + + String reactor1 = "line1\nline2"; + ArtifactDelta delta1 = getTextDelta(baseline, reactor1); + assertDeltaWithDetails(expectedDelta, delta1); + + String reactor2 = "line1\r\nline2"; + ArtifactDelta delta2 = getTextDelta(baseline, reactor2); + assertDeltaWithDetails(expectedDelta, delta2); + } + { + String baseline = "line1\r\n\nline2"; + String expectedDelta = "--- baseline" + NL + "+++ reactor" + NL + "@@ -2,1 +2,0 @@" + NL + "-"; + + String reactor1 = "line1\nline2"; + ArtifactDelta delta1 = getTextDelta(baseline, reactor1); + assertDeltaWithDetails(expectedDelta, delta1); + + String reactor2 = "line1\r\nline2"; + ArtifactDelta delta2 = getTextDelta(baseline, reactor2); + assertDeltaWithDetails(expectedDelta, delta2); + } + { + String baseline = "line1\r\r\nline2"; + String expectedDelta = "--- baseline" + NL + "+++ reactor" + NL + "@@ -2,1 +2,0 @@" + NL + "-"; + + String reactor1 = "line1\nline2"; + ArtifactDelta delta1 = getTextDelta(baseline, reactor1); + assertDeltaWithDetails(expectedDelta, delta1); + + String reactor2 = "line1\r\nline2"; + ArtifactDelta delta2 = getTextDelta(baseline, reactor2); + assertDeltaWithDetails(expectedDelta, delta2); + } + { + String baseline = "\r\nline1\r\nline2"; + String expectedDelta = "--- baseline" + NL + "+++ reactor" + NL + "@@ -1,1 +1,0 @@" + NL + "-"; + + String reactor1 = "line1\nline2"; + ArtifactDelta delta1 = getTextDelta(baseline, reactor1); + assertDeltaWithDetails(expectedDelta, delta1); + + String reactor2 = "line1\r\nline2"; + ArtifactDelta delta2 = getTextDelta(baseline, reactor2); + assertDeltaWithDetails(expectedDelta, delta2); + } + { + String baseline = "line1\r\nline2\r\n"; + String expectedDelta = ""; //BufferedReader.readLine() considers a trailing newline equals to EOF + + String reactor1 = "line1\nline2"; + ArtifactDelta delta1 = getTextDelta(baseline, reactor1); + assertDeltaWithDetails(expectedDelta, delta1); + + String reactor2 = "line1\r\nline2"; + ArtifactDelta delta2 = getTextDelta(baseline, reactor2); + assertDeltaWithDetails(expectedDelta, delta2); + } + } + + private static ArtifactDelta getTextDelta(String baseline, String reactor) throws IOException { + ComparisonData data = new ComparisonData(List.of(), false, /* Show diff details: */ true); + ComparatorInputStream baselineStream = new ComparatorInputStream(baseline.getBytes(StandardCharsets.UTF_8)); + ComparatorInputStream reactorStream = new ComparatorInputStream(reactor.getBytes(StandardCharsets.UTF_8)); + return TextComparator.compareText(baselineStream, reactorStream, data); + } + + private static void assertDeltaWithDetails(String expected, ArtifactDelta delta) { + assertNotEquals(ArtifactDelta.NO_DIFFERENCE, delta); + assertEquals(expected, delta.getDetailedMessage()); + } + +}