From 0f9129cc5589e0bbb6ac78cdb4b8dbc69458774b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C4=8C=C3=AD=C5=BEek?= Date: Mon, 15 Mar 2021 17:18:51 +0100 Subject: [PATCH] Avoid match-at-end regexp bottleneck in replacement to output joining. Fix #370. --- src/turndown.js | 30 +++++++++--------------------- src/utilities.js | 11 +++++++++++ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/turndown.js b/src/turndown.js index d65fc348..ff4a2cb3 100644 --- a/src/turndown.js +++ b/src/turndown.js @@ -1,11 +1,9 @@ import COMMONMARK_RULES from './commonmark-rules' import Rules from './rules' -import { extend } from './utilities' +import { extend, trimLeadingNewlines, trimTrailingNewlines } from './utilities' import RootNode from './root-node' import Node from './node' var reduce = Array.prototype.reduce -var leadingNewLinesRegExp = /^\n*/ -var trailingNewLinesRegExp = /\n*$/ var escapes = [ [/\\/g, '\\\\'], [/\*/g, '\\*'], @@ -212,31 +210,21 @@ function replacementForNode (node) { } /** - * Determines the new lines between the current output and the replacement + * Joins replacement to the current output with appropriate number of new lines * @private * @param {String} output The current conversion output * @param {String} replacement The string to append to the output - * @returns The whitespace to separate the current output and the replacement + * @returns Joined output * @type String */ -function separatingNewlines (output, replacement) { - var newlines = [ - output.match(trailingNewLinesRegExp)[0], - replacement.match(leadingNewLinesRegExp)[0] - ].sort() - var maxNewlines = newlines[newlines.length - 1] - return maxNewlines.length < 2 ? maxNewlines : '\n\n' -} - -function join (string1, string2) { - var separator = separatingNewlines(string1, string2) - - // Remove trailing/leading newlines and replace with separator - string1 = string1.replace(trailingNewLinesRegExp, '') - string2 = string2.replace(leadingNewLinesRegExp, '') +function join (output, replacement) { + var s1 = trimTrailingNewlines(output) + var s2 = trimLeadingNewlines(replacement) + var nls = Math.max(output.length - s1.length, replacement.length - s2.length) + var separator = '\n\n'.substring(0, nls) - return string1 + separator + string2 + return s1 + separator + s2 } /** diff --git a/src/utilities.js b/src/utilities.js index 50c3b09b..36f0acce 100644 --- a/src/utilities.js +++ b/src/utilities.js @@ -12,6 +12,17 @@ export function repeat (character, count) { return Array(count + 1).join(character) } +export function trimLeadingNewlines (string) { + return string.replace(/^\n*/, '') +} + +export function trimTrailingNewlines (string) { + // avoid match-at-end regexp bottleneck, see #370 + var indexEnd = string.length + while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd-- + return string.substring(0, indexEnd) +} + export var blockElements = [ 'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS', 'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',