Skip to content

Commit

Permalink
Refactor DefaultSplitCharacter#checkDatePattern to increase perfomance
Browse files Browse the repository at this point in the history
DEVSIX-4680
  • Loading branch information
ar3em committed Oct 27, 2020
1 parent 43bf5b7 commit 3072544
Show file tree
Hide file tree
Showing 3 changed files with 5,062 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
*/
public class DefaultSplitCharacter implements SplitCharacter {

private static final Pattern DATE_PATTERN = Pattern.compile("(\\d{2,4}-\\d{2}-\\d{2,4})");

/**
* An instance of the default SplitCharacter.
*/
Expand Down Expand Up @@ -154,11 +156,12 @@ protected char getCurrentCharacter(int current, char[] cc, PdfChunk[] ck) {
}

private char[] checkDatePattern(String data) {
String regex = "(\\d{2,4}-\\d{2}-\\d{2,4})";
Matcher m = Pattern.compile(regex).matcher(data);
if (m.find()) {
String tmpData = m.group(1).replace('-', '\u2011');
data = data.replaceAll(m.group(1), tmpData);
if (data.contains("-")) {
Matcher m = DATE_PATTERN.matcher(data);
if (m.find()) {
String tmpData = m.group(1).replace('-', '\u2011');
data = data.replaceAll(m.group(1), tmpData);
}
}
return data.toCharArray();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package com.itextpdf.text.pdf;

import java.io.BufferedReader;
import java.io.FileReader;
import java.text.MessageFormat;
import org.junit.Assert;
import org.junit.Test;

public class DefaultSplitCharacterProfilingTest {

private static final String INPUT_DIR = "./src/test/resources/com/itextpdf/text/pdf/DefaultSplitCharacterProfilingTest/";

private static final String CHECK_DATE_PATTERN_FAIL_MESSAGE =
"The test verifies the optimization of the checkDatePattern method. This failure indicates that the optimization was broken.";

private static final String READ_FILE_FAIL_MESSAGE = "Failed to read test file {0}. The test could not be completed.";

private static final int TIME_LIMIT = 20000;

@Test(timeout = 30000)
public void checkDatePatternProfilingTest() {
String testFile = INPUT_DIR + "profilingText.txt";
String str = readFile(testFile);
if (str == null) {
Assert.fail(MessageFormat.format(READ_FILE_FAIL_MESSAGE, testFile));
}
long startTime = System.currentTimeMillis();
for (int i = 0; i < 70000; i++) {
isSplitCharacter(str);
}
long time = System.currentTimeMillis() - startTime;
System.out.println("Test run time: " + time);
Assert.assertTrue(CHECK_DATE_PATTERN_FAIL_MESSAGE, time < TIME_LIMIT);
}

private static void isSplitCharacter(String text) {
new DefaultSplitCharacter().isSplitCharacter(0, 0, text.length() + 1, text.toCharArray(), null);
}

private static String readFile(String fileName) {
StringBuilder stringBuilder = new StringBuilder();
try {
BufferedReader reader = new BufferedReader(new FileReader(fileName));
String line;
while ((line = reader.readLine()) != null) {
stringBuilder.append(line);
}
reader.close();
return stringBuilder.toString();
} catch (Exception e) {
return null;
}
}
}
Loading

0 comments on commit 3072544

Please sign in to comment.