Skip to content

Commit

Permalink
Handle surrogate pairs in diff
Browse files Browse the repository at this point in the history
  • Loading branch information
KentoMoriwaki committed Oct 25, 2022
1 parent 7e43d18 commit 51ab65b
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 3 deletions.
18 changes: 18 additions & 0 deletions diff.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ import { equalityStrict } from './function.js'
* @template T
*/

const highSurrogateRegex = /[\uD800-\uDBFF]/
const lowSurrogateRegex = /[\uDC00-\uDFFF]/

/**
* Create a diff between two strings. This diff implementation is highly
* efficient, but not very sophisticated.
Expand All @@ -41,12 +44,18 @@ import { equalityStrict } from './function.js'
export const simpleDiffString = (a, b) => {
let left = 0 // number of same characters counting from left
let right = 0 // number of same characters counting from right
let inSurrogate = false
while (left < a.length && left < b.length && a[left] === b[left]) {
inSurrogate = highSurrogateRegex.test(a[left])
left++
}
if (inSurrogate) left--
inSurrogate = false
while (right + left < a.length && right + left < b.length && a[a.length - right - 1] === b[b.length - right - 1]) {
inSurrogate = lowSurrogateRegex.test(a[a.length - right - 1])
right++
}
if (inSurrogate) right--
return {
index: left,
remove: a.length - left - right,
Expand Down Expand Up @@ -103,30 +112,39 @@ export const simpleDiffStringWithCursor = (a, b, cursor) => {
let right = 0 // number of same characters counting from right
// Iterate left to the right until we find a changed character
// First iteration considers the current cursor position
let inSurrogate = false
while (
left < a.length &&
left < b.length &&
a[left] === b[left] &&
left < cursor
) {
inSurrogate = highSurrogateRegex.test(a[left])
left++
}
if (inSurrogate) left--
inSurrogate = false
// Iterate right to the left until we find a changed character
while (
right + left < a.length &&
right + left < b.length &&
a[a.length - right - 1] === b[b.length - right - 1]
) {
inSurrogate = lowSurrogateRegex.test(a[a.length - right - 1])
right++
}
if (inSurrogate) right--
inSurrogate = false
// Try to iterate left further to the right without caring about the current cursor position
while (
right + left < a.length &&
right + left < b.length &&
a[left] === b[left]
) {
inSurrogate = highSurrogateRegex.test(a[left])
left++
}
if (inSurrogate) left--
return {
index: left,
remove: a.length - left - right,
Expand Down
18 changes: 15 additions & 3 deletions diff.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import { simpleDiffString, simpleDiffArray, simpleDiffStringWithCursor } from '.
import * as prng from './prng.js'
import * as f from './function.js'
import * as t from './testing.js'
import * as object from './object.js'
import * as str from './string.js'

/**
Expand All @@ -14,8 +13,12 @@ function runDiffTest (a, b, expected) {
const result = simpleDiffString(a, b)
t.compare(result, expected)
t.compare(result, simpleDiffStringWithCursor(a, b, a.length)) // check that the withCursor approach returns the same result
const arrResult = simpleDiffArray(a.split(''), b.split(''))
t.compare(arrResult, object.assign({}, result, { insert: result.insert.split('') }))
const recomposed = str.splice(a, result.index, result.remove, result.insert)
t.compareStrings(recomposed, b)
const arrResult = simpleDiffArray(Array.from(a), Array.from(b))
const arrRecomposed = Array.from(a)
arrRecomposed.splice(arrResult.index, arrResult.remove, ...arrResult.insert)
t.compareStrings(arrRecomposed.join(''), b)
}

/**
Expand All @@ -30,6 +33,8 @@ export const testDiffing = tc => {
runDiffTest('abc', 'xyz', { index: 0, remove: 3, insert: 'xyz' })
runDiffTest('axz', 'au', { index: 1, remove: 2, insert: 'u' })
runDiffTest('ax', 'axy', { index: 2, remove: 0, insert: 'y' })
runDiffTest('\u{d83d}\u{dc77}'/* 'πŸ‘·' */, '\u{d83d}\u{dea7}\u{d83d}\u{dc77}'/* 'πŸš§πŸ‘·' */, { index: 0, remove: 0, insert: '🚧' })
runDiffTest('\u{d83d}\u{dea7}\u{d83d}\u{dc77}'/* 'πŸš§πŸ‘·' */, '\u{d83d}\u{dc77}'/* 'πŸ‘·' */, { index: 0, remove: 2, insert: '' })
}

/**
Expand Down Expand Up @@ -74,6 +79,13 @@ export const testSimpleDiffWithCursor = tc => {
const recomposed = str.splice(initial, change.index, change.remove, change.insert)
t.compareStrings(expected, recomposed)
}
{
const initial = '🚧🚧🚧'
const change = simpleDiffStringWithCursor(initial, '🚧🚧', 2) // Should delete after the midst of 🚧
t.compare(change, { insert: '', remove: 2, index: 2 })
const recomposed = str.splice(initial, change.index, change.remove, change.insert)
t.compareStrings('🚧🚧', recomposed)
}
}

/**
Expand Down

0 comments on commit 51ab65b

Please sign in to comment.