diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index 73b7d45d..bd2b7b24 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -7,7 +7,7 @@ use ariadne::{Color, Label, Report, ReportKind, Source}; use clap::Parser; use harper_comments::CommentParser; use harper_core::linting::{LintGroup, LintGroupConfig, Linter}; -use harper_core::parsers::Markdown; +use harper_core::parsers::{Markdown, MarkdownOptions}; use harper_core::{remove_overlaps, Dictionary, Document, FstDictionary}; #[derive(Debug, Parser)] @@ -32,12 +32,14 @@ enum Args { fn main() -> anyhow::Result<()> { let args = Args::parse(); + let markdown_options = MarkdownOptions::default(); + let linting_options = LintGroupConfig::default(); match args { Args::Lint { file, count } => { - let (doc, source) = load_file(&file)?; + let (doc, source) = load_file(&file, markdown_options)?; - let mut linter = LintGroup::new(LintGroupConfig::default(), FstDictionary::curated()); + let mut linter = LintGroup::new(linting_options, FstDictionary::curated()); let mut lints = linter.lint(&doc); if count { @@ -75,7 +77,7 @@ fn main() -> anyhow::Result<()> { std::process::exit(1); } Args::Parse { file } => { - let (doc, _) = load_file(&file)?; + let (doc, _) = load_file(&file, markdown_options)?; for token in doc.tokens() { let json = serde_json::to_string(&token)?; @@ -101,15 +103,15 @@ fn main() -> anyhow::Result<()> { } } -fn load_file(file: &Path) -> anyhow::Result<(Document, String)> { +fn load_file(file: &Path, markdown_options: MarkdownOptions) -> anyhow::Result<(Document, String)> { let source = std::fs::read_to_string(file)?; let mut parser: Box = if let Some("md") = file.extension().map(|v| v.to_str().unwrap()) { - Box::new(Markdown) + Box::new(Markdown::new(markdown_options)) } else { Box::new( - CommentParser::new_from_filename(file) + CommentParser::new_from_filename(file, Markdown::new(markdown_options)) .map(Box::new) .ok_or(format_err!("Could not detect language ID."))?, ) diff --git a/harper-comments/src/comment_parser.rs b/harper-comments/src/comment_parser.rs index 54536862..5f93706b 100644 --- a/harper-comments/src/comment_parser.rs +++ b/harper-comments/src/comment_parser.rs @@ -1,7 +1,7 @@ use std::path::Path; use comment_parsers::{Go, JavaDoc, JsDoc, Unit}; -use harper_core::parsers::{self, Parser}; +use harper_core::parsers::{self, Markdown, Parser}; use harper_core::{FullDictionary, Token}; use harper_tree_sitter::TreeSitterMasker; use tree_sitter::Node; @@ -17,7 +17,7 @@ impl CommentParser { self.inner.masker.create_ident_dict(source) } - pub fn new_from_language_id(language_id: &str) -> Option { + pub fn new_from_language_id(language_id: &str, markdown_parser: Markdown) -> Option { let language = match language_id { "rust" => tree_sitter_rust::language(), "typescriptreact" => tree_sitter_typescript::language_tsx(), @@ -40,10 +40,12 @@ impl CommentParser { }; let comment_parser: Box = match language_id { - "javascriptreact" | "typescript" | "typescriptreact" | "javascript" => Box::new(JsDoc), + "javascriptreact" | "typescript" | "typescriptreact" | "javascript" => { + Box::new(JsDoc::new(markdown_parser)) + } "java" => Box::new(JavaDoc::default()), - "go" => Box::new(Go), - _ => Box::new(Unit), + "go" => Box::new(Go::new(markdown_parser)), + _ => Box::new(Unit::new(markdown_parser)), }; Some(Self { @@ -55,8 +57,8 @@ impl CommentParser { } /// Infer the programming language from a provided filename. - pub fn new_from_filename(filename: &Path) -> Option { - Self::new_from_language_id(Self::filename_to_filetype(filename)?) + pub fn new_from_filename(filename: &Path, markdown_parser: Markdown) -> Option { + Self::new_from_language_id(Self::filename_to_filetype(filename)?, markdown_parser) } /// Convert a provided path to a corresponding Language Server Protocol file diff --git a/harper-comments/src/comment_parsers/go.rs b/harper-comments/src/comment_parsers/go.rs index 0c90b7d8..020df3d4 100644 --- a/harper-comments/src/comment_parsers/go.rs +++ b/harper-comments/src/comment_parsers/go.rs @@ -3,8 +3,16 @@ use harper_core::Token; use super::without_initiators; -#[derive(Debug, Clone, Copy)] -pub struct Go; +#[derive(Clone)] +pub struct Go { + markdown_parser: Markdown, +} + +impl Go { + pub fn new(markdown_parser: Markdown) -> Self { + Self { markdown_parser } + } +} impl Parser for Go { fn parse(&mut self, source: &[char]) -> Vec { @@ -25,9 +33,7 @@ impl Parser for Go { actual_source = new_source } - let mut markdown_parser = Markdown; - - let mut new_tokens = markdown_parser.parse(actual_source); + let mut new_tokens = self.markdown_parser.parse(actual_source); new_tokens .iter_mut() diff --git a/harper-comments/src/comment_parsers/jsdoc.rs b/harper-comments/src/comment_parsers/jsdoc.rs index 33e4858a..19766114 100644 --- a/harper-comments/src/comment_parsers/jsdoc.rs +++ b/harper-comments/src/comment_parsers/jsdoc.rs @@ -4,7 +4,16 @@ use itertools::Itertools; use super::without_initiators; -pub struct JsDoc; +#[derive(Clone)] +pub struct JsDoc { + markdown_parser: Markdown, +} + +impl JsDoc { + pub fn new(markdown_parser: Markdown) -> Self { + Self { markdown_parser } + } +} impl Parser for JsDoc { fn parse(&mut self, source: &[char]) -> Vec { @@ -13,7 +22,7 @@ impl Parser for JsDoc { let mut chars_traversed = 0; for line in source.split(|c| *c == '\n') { - let mut new_tokens = parse_line(line); + let mut new_tokens = parse_line(line, &mut self.markdown_parser); if chars_traversed + line.len() < source.len() { new_tokens.push(Token::new( @@ -34,7 +43,7 @@ impl Parser for JsDoc { } } -fn parse_line(source: &[char]) -> Vec { +fn parse_line(source: &[char], markdown_parser: &mut Markdown) -> Vec { let actual_line = without_initiators(source); if actual_line.is_empty() { @@ -43,7 +52,7 @@ fn parse_line(source: &[char]) -> Vec { let source_line = actual_line.get_content(source); - let mut new_tokens = Markdown.parse(source_line); + let mut new_tokens = markdown_parser.parse(source_line); // Handle inline tags mark_inline_tags(&mut new_tokens); @@ -148,21 +157,23 @@ fn parse_inline_tag(tokens: &[Token]) -> Option { #[cfg(test)] mod tests { - use harper_core::{Document, Punctuation, TokenKind}; + use harper_core::{parsers::Markdown, Document, Punctuation, TokenKind}; use crate::CommentParser; #[test] fn escapes_loop() { let source = "/** This should _not_cause an infinite loop: {@ */"; - let mut parser = CommentParser::new_from_language_id("javascript").unwrap(); + let mut parser = + CommentParser::new_from_language_id("javascript", Markdown::default()).unwrap(); Document::new_curated(source, &mut parser); } #[test] fn handles_inline_link() { let source = "/** See {@link MyClass} and [MyClass's foo property]{@link MyClass#foo}. */"; - let mut parser = CommentParser::new_from_language_id("javascript").unwrap(); + let mut parser = + CommentParser::new_from_language_id("javascript", Markdown::default()).unwrap(); let document = Document::new_curated(source, &mut parser); assert!(matches!( @@ -206,7 +217,8 @@ mod tests { #[test] fn handles_class() { let source = "/** @class Circle representing a circle. */"; - let mut parser = CommentParser::new_from_language_id("javascript").unwrap(); + let mut parser = + CommentParser::new_from_language_id("javascript", Markdown::default()).unwrap(); let document = Document::new_curated(source, &mut parser); assert!(document diff --git a/harper-comments/src/comment_parsers/unit.rs b/harper-comments/src/comment_parsers/unit.rs index 324225ca..97af2869 100644 --- a/harper-comments/src/comment_parsers/unit.rs +++ b/harper-comments/src/comment_parsers/unit.rs @@ -9,7 +9,16 @@ use super::without_initiators; /// /// It assumes it is being provided a single line of comment at a time, /// including the comment initiation characters. -pub struct Unit; +#[derive(Clone)] +pub struct Unit { + markdown_parser: Markdown, +} + +impl Unit { + pub fn new(markdown_parser: Markdown) -> Self { + Self { markdown_parser } + } +} impl Parser for Unit { fn parse(&mut self, source: &[char]) -> Vec { @@ -28,7 +37,7 @@ impl Parser for Unit { continue; } - let mut new_tokens = parse_line(line); + let mut new_tokens = parse_line(line, &mut self.markdown_parser); if chars_traversed + line.len() < source.len() { new_tokens.push(Token::new( @@ -49,7 +58,7 @@ impl Parser for Unit { } } -fn parse_line(source: &[char]) -> Vec { +fn parse_line(source: &[char], markdown_parser: &mut Markdown) -> Vec { let actual = without_initiators(source); if actual.is_empty() { @@ -58,8 +67,6 @@ fn parse_line(source: &[char]) -> Vec { let source = actual.get_content(source); - let mut markdown_parser = Markdown; - let mut new_tokens = markdown_parser.parse(source); new_tokens diff --git a/harper-comments/tests/language_support.rs b/harper-comments/tests/language_support.rs index ca9df0a3..df04a5ea 100644 --- a/harper-comments/tests/language_support.rs +++ b/harper-comments/tests/language_support.rs @@ -2,6 +2,7 @@ use std::path::Path; use harper_comments::CommentParser; use harper_core::linting::{LintGroup, LintGroupConfig, Linter}; +use harper_core::parsers::Markdown; use harper_core::{Document, FstDictionary}; /// Creates a unit test checking that the linting of a source file in @@ -20,7 +21,7 @@ macro_rules! create_test { ) ); - let mut parser = CommentParser::new_from_filename(Path::new(filename)).unwrap(); + let mut parser = CommentParser::new_from_filename(Path::new(filename), Markdown::default()).unwrap(); let dict = FstDictionary::curated(); let document = Document::new(&source, &mut parser, &dict); diff --git a/harper-core/benches/parse_demo.rs b/harper-core/benches/parse_demo.rs index d7626532..09221586 100644 --- a/harper-core/benches/parse_demo.rs +++ b/harper-core/benches/parse_demo.rs @@ -1,19 +1,20 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use harper_core::linting::{LintGroup, LintGroupConfig, Linter}; +use harper_core::parsers::MarkdownOptions; use harper_core::{Document, FstDictionary}; static DEMO: &str = include_str!("../../demo.md"); fn parse_demo(c: &mut Criterion) { c.bench_function("parse_demo", |b| { - b.iter(|| Document::new_markdown_curated(black_box(DEMO))) + b.iter(|| Document::new_markdown_curated(black_box(DEMO), MarkdownOptions::default())) }); } fn lint_demo(c: &mut Criterion) { let dictionary = FstDictionary::curated(); let mut lint_set = LintGroup::new(Default::default(), dictionary); - let document = Document::new_markdown_curated(black_box(DEMO)); + let document = Document::new_markdown_curated(black_box(DEMO), MarkdownOptions::default()); c.bench_function("lint_demo", |b| { b.iter(|| lint_set.lint(&document)); @@ -25,7 +26,8 @@ fn lint_demo_uncached(c: &mut Criterion) { b.iter(|| { let dictionary = FstDictionary::curated(); let mut lint_set = LintGroup::new(LintGroupConfig::default(), dictionary.clone()); - let document = Document::new_markdown(black_box(DEMO), &dictionary); + let document = + Document::new_markdown(black_box(DEMO), MarkdownOptions::default(), &dictionary); lint_set.lint(&document) }) }); diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index c6adce90..34dab4d7 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -4,7 +4,7 @@ use std::fmt::Display; use paste::paste; -use crate::parsers::{Markdown, Parser, PlainEnglish}; +use crate::parsers::{Markdown, MarkdownOptions, Parser, PlainEnglish}; use crate::patterns::{PatternExt, RepeatingPattern, SequencePattern}; use crate::punctuation::Punctuation; use crate::token::NumberSuffix; @@ -71,14 +71,22 @@ impl Document { /// Parse text to produce a document using the built-in [`Markdown`] parser /// and curated dictionary. - pub fn new_markdown_curated(text: &str) -> Self { - Self::new(text, &mut Markdown, &FstDictionary::curated()) + pub fn new_markdown_curated(text: &str, markdown_options: MarkdownOptions) -> Self { + Self::new( + text, + &mut Markdown::new(markdown_options), + &FstDictionary::curated(), + ) } /// Parse text to produce a document using the built-in [`PlainEnglish`] /// parser and the curated dictionary. - pub fn new_markdown(text: &str, dictionary: &impl Dictionary) -> Self { - Self::new(text, &mut Markdown, dictionary) + pub fn new_markdown( + text: &str, + markdown_options: MarkdownOptions, + dictionary: &impl Dictionary, + ) -> Self { + Self::new(text, &mut Markdown::new(markdown_options), dictionary) } /// Re-parse important language constructs. @@ -553,14 +561,14 @@ mod tests { use itertools::Itertools; use super::Document; - use crate::Span; + use crate::{parsers::MarkdownOptions, Span}; fn assert_condensed_contractions(text: &str, final_tok_count: usize) { let document = Document::new_plain_english_curated(text); assert_eq!(document.tokens.len(), final_tok_count); - let document = Document::new_markdown_curated(text); + let document = Document::new_markdown_curated(text, MarkdownOptions::default()); assert_eq!(document.tokens.len(), final_tok_count); } diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index e2978c1c..1a89c76d 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -62,10 +62,10 @@ pub trait Linter: Send + Sync { #[cfg(test)] mod tests { use super::Linter; - use crate::Document; + use crate::{parsers::MarkdownOptions, Document}; pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) { - let test = Document::new_markdown_curated(text); + let test = Document::new_markdown_curated(text, MarkdownOptions::default()); let lints = linter.lint(&test); dbg!(&lints); assert_eq!(lints.len(), count); @@ -74,7 +74,7 @@ mod tests { /// Runs a provided linter on text, applies the first suggestion from each /// lint and asserts that the result is equal to a given value. pub fn assert_suggestion_result(text: &str, mut linter: impl Linter, expected_result: &str) { - let test = Document::new_markdown_curated(text); + let test = Document::new_markdown_curated(text, MarkdownOptions::default()); let lints = linter.lint(&test); let mut text: Vec = text.chars().collect(); diff --git a/harper-core/src/parsers/markdown.rs b/harper-core/src/parsers/markdown.rs index 621b8ddc..2926aa99 100644 --- a/harper-core/src/parsers/markdown.rs +++ b/harper-core/src/parsers/markdown.rs @@ -1,5 +1,7 @@ use std::collections::VecDeque; +use serde::{Deserialize, Serialize}; + use super::{Parser, PlainEnglish}; use crate::{Span, Token, TokenKind, TokenStringExt, VecExt}; @@ -7,15 +9,19 @@ use crate::{Span, Token, TokenKind, TokenStringExt, VecExt}; /// CommonMark files. /// /// Will ignore code blocks and tables. -#[derive(Default)] -pub struct Markdown(pub MarkdownOptions); +#[derive(Default, Clone, Debug)] +pub struct Markdown(MarkdownOptions); -#[derive(Default, Copy, Clone)] +#[derive(Default, Copy, Clone, Debug, Serialize, Deserialize)] pub struct MarkdownOptions { - ignore_link_title: bool, + pub ignore_link_title: bool, } impl Markdown { + pub fn new(options: MarkdownOptions) -> Self { + Self(options) + } + /// Remove hidden Wikilink target text. /// /// As in, the stuff to the left of the pipe operator: diff --git a/harper-core/src/parsers/mod.rs b/harper-core/src/parsers/mod.rs index 00a80218..4429e054 100644 --- a/harper-core/src/parsers/mod.rs +++ b/harper-core/src/parsers/mod.rs @@ -63,7 +63,7 @@ mod tests { } fn assert_tokens_eq_md(test_str: impl AsRef, expected: &[TokenKind]) { - let mut parser = Markdown; + let mut parser = Markdown::default(); assert_tokens_eq(test_str, expected, &mut parser) } diff --git a/harper-core/tests/run_tests.rs b/harper-core/tests/run_tests.rs index 058b039d..332dfa14 100644 --- a/harper-core/tests/run_tests.rs +++ b/harper-core/tests/run_tests.rs @@ -1,4 +1,5 @@ use harper_core::linting::{LintGroup, LintGroupConfig, Linter}; +use harper_core::parsers::MarkdownOptions; use harper_core::{Document, FstDictionary}; /// Creates a unit test checking that the linting of a Markdown document (in @@ -16,7 +17,7 @@ macro_rules! create_test { ); let dict = FstDictionary::curated(); - let document = Document::new_markdown(&source, &dict); + let document = Document::new_markdown(&source, MarkdownOptions::default(), &dict); let mut linter = LintGroup::new( LintGroupConfig::default(), diff --git a/harper-html/tests/run_tests.rs b/harper-html/tests/run_tests.rs index 7d6aec2b..7f39d644 100644 --- a/harper-html/tests/run_tests.rs +++ b/harper-html/tests/run_tests.rs @@ -1,4 +1,5 @@ use harper_core::linting::{LintGroup, LintGroupConfig, Linter}; +use harper_core::parsers::MarkdownOptions; use harper_core::{Document, FstDictionary}; /// Creates a unit test checking that the linting of a Markdown document (in @@ -16,7 +17,7 @@ macro_rules! create_test { ); let dict = FstDictionary::curated(); - let document = Document::new_markdown(&source, &dict); + let document = Document::new_markdown(&source, MarkdownOptions::default(), &dict); let mut linter = LintGroup::new( LintGroupConfig::default(), diff --git a/harper-ls/src/backend.rs b/harper-ls/src/backend.rs index 73ceedeb..6f9156e1 100644 --- a/harper-ls/src/backend.rs +++ b/harper-ls/src/backend.rs @@ -44,8 +44,8 @@ impl Backend { pub fn new(client: Client, config: Config) -> Self { Self { client, - doc_state: Mutex::new(HashMap::new()), config: RwLock::new(config), + doc_state: Mutex::new(HashMap::new()), } } @@ -180,41 +180,48 @@ impl Backend { return Ok(()); }; - let parser: Option> = - if let Some(ts_parser) = CommentParser::new_from_language_id(language_id) { - let source: Vec = text.chars().collect(); - let source = Arc::new(source); - - if let Some(new_dict) = ts_parser.create_ident_dict(source.as_slice()) { - let new_dict = Arc::new(new_dict); - - if doc_state.ident_dict != new_dict { - doc_state.ident_dict = new_dict.clone(); - let mut merged = self.generate_file_dictionary(url).await?; - merged.add_dictionary(new_dict); - let merged = Arc::new(merged); - - doc_state.linter = LintGroup::new(config_lock.lint_config, merged.clone()); - doc_state.dict = merged.clone(); - } - Some(Box::new(CollapseIdentifiers::new( - Box::new(ts_parser), - Box::new(doc_state.dict.clone()), - ))) - } else { - Some(Box::new(ts_parser)) + let parser: Option> = if let Some(ts_parser) = + CommentParser::new_from_language_id( + language_id, + Markdown::new(self.config.read().await.markdown_options), + ) { + let source: Vec = text.chars().collect(); + let source = Arc::new(source); + + if let Some(new_dict) = ts_parser.create_ident_dict(source.as_slice()) { + let new_dict = Arc::new(new_dict); + + if doc_state.ident_dict != new_dict { + doc_state.ident_dict = new_dict.clone(); + let mut merged = self.generate_file_dictionary(url).await?; + merged.add_dictionary(new_dict); + let merged = Arc::new(merged); + + doc_state.linter = LintGroup::new(config_lock.lint_config, merged.clone()); + doc_state.dict = merged.clone(); } - } else if language_id == "markdown" { - Some(Box::new(Markdown)) - } else if language_id == "git-commit" { - Some(Box::new(GitCommitParser)) - } else if language_id == "html" { - Some(Box::new(HtmlParser::default())) - } else if language_id == "mail" || language_id == "plaintext" { - Some(Box::new(PlainEnglish)) + Some(Box::new(CollapseIdentifiers::new( + Box::new(ts_parser), + Box::new(doc_state.dict.clone()), + ))) } else { - None - }; + Some(Box::new(ts_parser)) + } + } else if language_id == "markdown" { + Some(Box::new(Markdown::new( + self.config.read().await.markdown_options, + ))) + } else if language_id == "git-commit" { + Some(Box::new(GitCommitParser::new(Markdown::new( + self.config.read().await.markdown_options, + )))) + } else if language_id == "html" { + Some(Box::new(HtmlParser::default())) + } else if language_id == "mail" || language_id == "plaintext" { + Some(Box::new(PlainEnglish)) + } else { + None + }; match parser { None => { diff --git a/harper-ls/src/config.rs b/harper-ls/src/config.rs index 74239f37..5cb9da91 100644 --- a/harper-ls/src/config.rs +++ b/harper-ls/src/config.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use dirs::{config_dir, data_local_dir}; -use harper_core::linting::LintGroupConfig; +use harper_core::{linting::LintGroupConfig, parsers::MarkdownOptions}; use resolve_path::PathResolveExt; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -70,6 +70,7 @@ pub struct Config { pub diagnostic_severity: DiagnosticSeverity, pub code_action_config: CodeActionConfig, pub isolate_english: bool, + pub markdown_options: MarkdownOptions, } impl Config { @@ -124,6 +125,10 @@ impl Config { } } + if let Some(v) = value.get("markdown") { + base.markdown_options = serde_json::from_value(v.clone())?; + } + Ok(base) } } @@ -139,6 +144,7 @@ impl Default for Config { diagnostic_severity: DiagnosticSeverity::Hint, code_action_config: CodeActionConfig::default(), isolate_english: false, + markdown_options: MarkdownOptions::default(), } } } diff --git a/harper-ls/src/git_commit_parser.rs b/harper-ls/src/git_commit_parser.rs index 2b2f7d06..c39ca332 100644 --- a/harper-ls/src/git_commit_parser.rs +++ b/harper-ls/src/git_commit_parser.rs @@ -1,7 +1,16 @@ use harper_core::parsers::{Markdown, Parser}; /// A Harper parser for Git commit files -pub struct GitCommitParser; +#[derive(Clone)] +pub struct GitCommitParser { + markdown_parser: Markdown, +} + +impl GitCommitParser { + pub fn new(markdown_parser: Markdown) -> Self { + Self { markdown_parser } + } +} impl Parser for GitCommitParser { /// Admittedly a somewhat naive implementation. @@ -13,8 +22,6 @@ impl Parser for GitCommitParser { .position(|c| *c == '#') .unwrap_or(source.len()); - let mut md_parser = Markdown; - - md_parser.parse(&source[0..end]) + self.markdown_parser.parse(&source[0..end]) } } diff --git a/harper-wasm/src/lib.rs b/harper-wasm/src/lib.rs index 53eeffc7..0e421e6d 100644 --- a/harper-wasm/src/lib.rs +++ b/harper-wasm/src/lib.rs @@ -67,8 +67,12 @@ pub fn lint(text: String) -> Vec { let source: Vec<_> = text.chars().collect(); let source = Lrc::new(source); - let document = - Document::new_from_vec(source.clone(), &mut Markdown, &FullDictionary::curated()); + // TODO: Have a way to configure the markdown parser + let document = Document::new_from_vec( + source.clone(), + &mut Markdown::default(), + &FullDictionary::curated(), + ); let mut lints = LINTER.lock().unwrap().lint(&document);