Skip to content

Commit

Permalink
Write Brief: Fix highlight position on spelling mistake following ign…
Browse files Browse the repository at this point in the history
…ored special word (#39282)

* add slash to special characters

* fix highlight position

* changelog

* fix slash handling
  • Loading branch information
dhasilva authored Sep 9, 2024
1 parent 1e3851d commit 6edf7ea
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: patch
Type: bugfix

Write Brief: Fix highlight position on spelling mistake following ignored special word
Original file line number Diff line number Diff line change
Expand Up @@ -180,32 +180,39 @@ export default function spellingMistakes( text: string ): Array< HighlightedText
return highlightedTexts;
}

// Regex to match words, including contractions and hyphenated words, possibly prefixed with special characters
// \p{L} is a Unicode property that matches any letter in any language
// \p{M} is a Unicode property that matches any character intended to be combined with another character
const wordRegex = new RegExp( /[@#+$]{0,1}[\p{L}\p{M}'-]+/, 'gu' );
const words = ( text.match( wordRegex ) || [] )
// Filter out words that start with special characters
.filter( word => [ '@', '#', '+', '$' ].indexOf( word[ 0 ] ) === -1 )
// Split hyphenated words into separate words as nspell doesn't work well with them
.map( word => word.split( '-' ) )
.flat();

// To avoid highlighting the same word occurrence multiple times
let searchStartIndex = 0;

words.forEach( ( word: string ) => {
const wordIndex = text.indexOf( word, searchStartIndex );

if ( ! spellChecker.correct( word ) ) {
highlightedTexts.push( {
text: word,
startIndex: wordIndex,
endIndex: wordIndex + word.length,
} );
// Regex to match words, including contractions, hyphenated words, and words separated by slashes
// \p{L} matches any Unicode letter in any language
// \p{M} matches any Unicode mark (combining characters)
// The regex has three main parts:
// 1. [@#+$/]{0,1} - Optionally matches a single special character at the start
// 2. [\p{L}\p{M}'-]+ - Matches one or more letters, marks, apostrophes, or hyphens
// 3. (?:\/[\p{L}\p{M}'-]+)* - Optionally matches additional parts separated by slashes
const wordRegex = new RegExp( /[@#+$/]{0,1}[\p{L}\p{M}'-]+(?:\/[\p{L}\p{M}'-]+)*/gu );
const matches = Array.from( text.matchAll( wordRegex ) );

matches.forEach( match => {
const word = match[ 0 ];
const startIndex = match.index as number;

// Skip words that start with special characters
if ( [ '@', '#', '+', '$', '/' ].indexOf( word[ 0 ] ) !== -1 ) {
return;
}

searchStartIndex = wordIndex + word.length;
// Split words by hyphens and slashes
const subWords = word.split( /[-/]/ );

subWords.forEach( subWord => {
if ( ! spellChecker.correct( subWord ) ) {
const subWordStartIndex = startIndex + word.indexOf( subWord );

highlightedTexts.push( {
text: subWord,
startIndex: subWordStartIndex,
endIndex: subWordStartIndex + subWord.length,
} );
}
} );
} );

return highlightedTexts;
Expand Down

0 comments on commit 6edf7ea

Please sign in to comment.