diff --git a/diff.js b/diff.js index 1eac2cf..fdaada2 100644 --- a/diff.js +++ b/diff.js @@ -28,6 +28,9 @@ import { equalityStrict } from './function.js' * @template T */ +const highSurrogateRegex = /[\uD800-\uDBFF]/ +const lowSurrogateRegex = /[\uDC00-\uDFFF]/ + /** * Create a diff between two strings. This diff implementation is highly * efficient, but not very sophisticated. @@ -41,12 +44,18 @@ import { equalityStrict } from './function.js' export const simpleDiffString = (a, b) => { let left = 0 // number of same characters counting from left let right = 0 // number of same characters counting from right + let inSurrogate = false while (left < a.length && left < b.length && a[left] === b[left]) { + inSurrogate = highSurrogateRegex.test(a[left]) left++ } + if (inSurrogate) left-- + inSurrogate = false while (right + left < a.length && right + left < b.length && a[a.length - right - 1] === b[b.length - right - 1]) { + inSurrogate = lowSurrogateRegex.test(a[a.length - right - 1]) right++ } + if (inSurrogate) right-- return { index: left, remove: a.length - left - right, @@ -103,30 +112,39 @@ export const simpleDiffStringWithCursor = (a, b, cursor) => { let right = 0 // number of same characters counting from right // Iterate left to the right until we find a changed character // First iteration considers the current cursor position + let inSurrogate = false while ( left < a.length && left < b.length && a[left] === b[left] && left < cursor ) { + inSurrogate = highSurrogateRegex.test(a[left]) left++ } + if (inSurrogate) left-- + inSurrogate = false // Iterate right to the left until we find a changed character while ( right + left < a.length && right + left < b.length && a[a.length - right - 1] === b[b.length - right - 1] ) { + inSurrogate = lowSurrogateRegex.test(a[a.length - right - 1]) right++ } + if (inSurrogate) right-- + inSurrogate = false // Try to iterate left further to the right without caring about the current cursor position while ( right + left < a.length && right + left < b.length && a[left] === b[left] ) { + inSurrogate = highSurrogateRegex.test(a[left]) left++ } + if (inSurrogate) left-- return { index: left, remove: a.length - left - right, diff --git a/diff.test.js b/diff.test.js index e5881a6..7e85b37 100644 --- a/diff.test.js +++ b/diff.test.js @@ -2,7 +2,6 @@ import { simpleDiffString, simpleDiffArray, simpleDiffStringWithCursor } from '. import * as prng from './prng.js' import * as f from './function.js' import * as t from './testing.js' -import * as object from './object.js' import * as str from './string.js' /** @@ -14,8 +13,12 @@ function runDiffTest (a, b, expected) { const result = simpleDiffString(a, b) t.compare(result, expected) t.compare(result, simpleDiffStringWithCursor(a, b, a.length)) // check that the withCursor approach returns the same result - const arrResult = simpleDiffArray(a.split(''), b.split('')) - t.compare(arrResult, object.assign({}, result, { insert: result.insert.split('') })) + const recomposed = str.splice(a, result.index, result.remove, result.insert) + t.compareStrings(recomposed, b) + const arrResult = simpleDiffArray(Array.from(a), Array.from(b)) + const arrRecomposed = Array.from(a) + arrRecomposed.splice(arrResult.index, arrResult.remove, ...arrResult.insert) + t.compareStrings(arrRecomposed.join(''), b) } /** @@ -30,6 +33,8 @@ export const testDiffing = tc => { runDiffTest('abc', 'xyz', { index: 0, remove: 3, insert: 'xyz' }) runDiffTest('axz', 'au', { index: 1, remove: 2, insert: 'u' }) runDiffTest('ax', 'axy', { index: 2, remove: 0, insert: 'y' }) + runDiffTest('\u{d83d}\u{dc77}'/* '👷' */, '\u{d83d}\u{dea7}\u{d83d}\u{dc77}'/* '🚧👷' */, { index: 0, remove: 0, insert: '🚧' }) + runDiffTest('\u{d83d}\u{dea7}\u{d83d}\u{dc77}'/* '🚧👷' */, '\u{d83d}\u{dc77}'/* '👷' */, { index: 0, remove: 2, insert: '' }) } /** @@ -74,6 +79,13 @@ export const testSimpleDiffWithCursor = tc => { const recomposed = str.splice(initial, change.index, change.remove, change.insert) t.compareStrings(expected, recomposed) } + { + const initial = '🚧🚧🚧' + const change = simpleDiffStringWithCursor(initial, '🚧🚧', 2) // Should delete after the midst of 🚧 + t.compare(change, { insert: '', remove: 2, index: 2 }) + const recomposed = str.splice(initial, change.index, change.remove, change.insert) + t.compareStrings('🚧🚧', recomposed) + } } /**