From 39b41ae01f7a4c251ca7a109a483bbdff4bb059d Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Fri, 3 Jan 2025 08:43:58 -0700 Subject: [PATCH] feat(core): expanded `CompoundWords` to cover hyphenated cases --- harper-core/src/linting/compound_words.rs | 42 ++++++++++++++++++++--- harper-core/src/linting/matcher.rs | 1 - harper-core/src/linting/mod.rs | 13 ++++++- 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/harper-core/src/linting/compound_words.rs b/harper-core/src/linting/compound_words.rs index 47d92533..b7d25491 100644 --- a/harper-core/src/linting/compound_words.rs +++ b/harper-core/src/linting/compound_words.rs @@ -29,6 +29,7 @@ impl Linter for CompoundWords { let mut lints = Vec::new(); let mut merged_word = CharString::new(); + let mut potential_compounds = Vec::new(); for (a, w, b) in document.tokens().tuple_windows() { if !a.kind.is_word() || !w.kind.is_whitespace() || !b.kind.is_word() { @@ -43,17 +44,39 @@ impl Linter for CompoundWords { continue; } + potential_compounds.clear(); + + merged_word.clear(); + merged_word.extend_from_slice(a_chars); + merged_word.extend_from_slice(b_chars); + + // Check for closed compound words + if self.dict.contains_word(&merged_word) { + potential_compounds.push(merged_word.clone()); + } + + // Check for hyphenated compound words merged_word.clear(); merged_word.extend_from_slice(a_chars); + merged_word.push('-'); merged_word.extend_from_slice(b_chars); + // Check for closed compound words if self.dict.contains_word(&merged_word) { + potential_compounds.push(merged_word.clone()); + } + + if !potential_compounds.is_empty() { lints.push(Lint { span: Span::new(a.span.start, b.span.end), lint_kind: LintKind::Spelling, - suggestions: vec![Suggestion::ReplaceWith(merged_word.to_vec())], - message: "These two words are often combined to form a closed compound word." - .to_owned(), + suggestions: potential_compounds + .drain(..) + .map(|v| Suggestion::ReplaceWith(v.to_vec())) + .collect(), + message: + "These two words are often combined to form a hyphenated compound word." + .to_owned(), priority: 63, }); } @@ -69,7 +92,9 @@ impl Linter for CompoundWords { #[cfg(test)] mod tests { - use crate::linting::tests::assert_lint_count; + use crate::linting::tests::{ + assert_lint_count, assert_suggestion_count, assert_suggestion_result, + }; use super::CompoundWords; @@ -144,4 +169,13 @@ mod tests { 1, ); } + + #[test] + fn break_up() { + assert_suggestion_count( + "Like if you break up words you shouldn't.", + CompoundWords::default(), + 2, + ); + } } diff --git a/harper-core/src/linting/matcher.rs b/harper-core/src/linting/matcher.rs index cefd4c50..6e34785e 100644 --- a/harper-core/src/linting/matcher.rs +++ b/harper-core/src/linting/matcher.rs @@ -180,7 +180,6 @@ impl Matcher { "The","hing" => "The thing", "need","helps" => "need help", "an","this" => "and this", - "break","up" => "break-up", "case", "sensitive" => "case-sensitive", "Tree", "sitter" => "Tree-sitter", "all", "of", "the" => "all the", diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 254d286a..bbd650b6 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -74,7 +74,7 @@ pub trait Linter: Send + Sync { #[cfg(test)] mod tests { use super::Linter; - use crate::Document; + use crate::{remove_overlaps, Document}; pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) { let test = Document::new_markdown_curated(text); @@ -83,6 +83,17 @@ mod tests { assert_eq!(lints.len(), count); } + /// Assert the total number of suggestions produced by a [`Linter`], spread across all produced + /// [`Lint`]s. + pub fn assert_suggestion_count(text: &str, mut linter: impl Linter, count: usize) { + let test = Document::new_markdown_curated(text); + let lints = linter.lint(&test); + assert_eq!( + lints.iter().map(|l| l.suggestions.len()).sum::(), + count + ); + } + /// Runs a provided linter on text, applies the first suggestion from each /// lint and asserts that the result is equal to a given value. pub fn assert_suggestion_result(text: &str, mut linter: impl Linter, expected_result: &str) {