diff --git a/Cargo.lock b/Cargo.lock index cea3b045..4ddc4403 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -503,6 +503,7 @@ name = "harper-comments" version = "0.8.8" dependencies = [ "harper-core", + "harper-html", "harper-tree-sitter", "itertools 0.12.1", "paste", @@ -549,6 +550,7 @@ version = "0.8.8" dependencies = [ "harper-core", "harper-tree-sitter", + "paste", "tree-sitter", "tree-sitter-html", ] diff --git a/harper-comments/Cargo.toml b/harper-comments/Cargo.toml index 8ef0fe67..9780cc53 100644 --- a/harper-comments/Cargo.toml +++ b/harper-comments/Cargo.toml @@ -9,6 +9,7 @@ repository = "https://github.com/elijah-potter/harper" [dependencies] harper-core = { path = "../harper-core", version = "0.8.0" } +harper-html = { path = "../harper-html", version = "0.8.0" } harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.8.0" } tree-sitter = "0.20.10" tree-sitter-rust = "0.20.4" diff --git a/harper-comments/src/comment_parser.rs b/harper-comments/src/comment_parser.rs index 0f6e17ce..4062d542 100644 --- a/harper-comments/src/comment_parser.rs +++ b/harper-comments/src/comment_parser.rs @@ -1,6 +1,6 @@ use std::path::Path; -use comment_parsers::{Go, JsDoc, Unit}; +use comment_parsers::{Go, JavaDoc, JsDoc, Unit}; use harper_core::parsers::{self, Parser}; use harper_core::{FullDictionary, Token}; use harper_tree_sitter::TreeSitterMasker; @@ -9,7 +9,7 @@ use tree_sitter::Node; use crate::comment_parsers; pub struct CommentParser { - inner: parsers::Mask> + inner: parsers::Mask>, } impl CommentParser { @@ -35,20 +35,21 @@ impl CommentParser { "lua" => tree_sitter_lua::language(), "sh" => tree_sitter_bash::language(), "java" => tree_sitter_java::language(), - _ => return None + _ => return None, }; let comment_parser: Box = match language_id { "javascriptreact" | "typescript" | "typescriptreact" | "javascript" => Box::new(JsDoc), + "java" => Box::new(JavaDoc::default()), "go" => Box::new(Go), - _ => Box::new(Unit) + _ => Box::new(Unit), }; Some(Self { inner: parsers::Mask::new( TreeSitterMasker::new(language, Self::node_condition), - comment_parser - ) + comment_parser, + ), }) } @@ -81,7 +82,7 @@ impl CommentParser { "sh" => "sh", "bash" => "sh", "java" => "java", - _ => return None + _ => return None, }) } diff --git a/harper-comments/src/comment_parsers/javadoc.rs b/harper-comments/src/comment_parsers/javadoc.rs new file mode 100644 index 00000000..2a15521a --- /dev/null +++ b/harper-comments/src/comment_parsers/javadoc.rs @@ -0,0 +1,24 @@ +use harper_core::{parsers::Parser, Token}; +use harper_html::HtmlParser; + +use super::without_initiators; + +#[derive(Default)] +pub struct JavaDoc { + html_parser: HtmlParser, +} + +impl Parser for JavaDoc { + fn parse(&mut self, source: &[char]) -> Vec { + let actual = without_initiators(source); + let actual_source = actual.get_content(source); + + let mut tokens = self.html_parser.parse(actual_source); + + for token in tokens.iter_mut() { + token.span.push_by(actual.start); + } + + tokens + } +} diff --git a/harper-comments/src/comment_parsers/jsdoc.rs b/harper-comments/src/comment_parsers/jsdoc.rs index 7512493b..3e9b4213 100644 --- a/harper-comments/src/comment_parsers/jsdoc.rs +++ b/harper-comments/src/comment_parsers/jsdoc.rs @@ -17,7 +17,7 @@ impl Parser for JsDoc { new_tokens.push(Token::new( Span::new_with_len(line.len(), 1), - harper_core::TokenKind::Newline(1) + harper_core::TokenKind::Newline(1), )); new_tokens @@ -204,7 +204,7 @@ mod tests { TokenKind::Unlintable, TokenKind::Unlintable, TokenKind::Punctuation(Punctuation::Period), - TokenKind::Newline(2), + TokenKind::Newline(1), ] ); } @@ -217,6 +217,6 @@ mod tests { assert!(document .tokens() - .all(|t| t.kind.is_unlintable() || t.kind.is_newline())); + .all(|t| t.kind.is_unlintable() || t.kind.is_newline() || t.kind.is_paragraph_break())); } } diff --git a/harper-comments/src/comment_parsers/mod.rs b/harper-comments/src/comment_parsers/mod.rs index 3d1aec7e..c668e968 100644 --- a/harper-comments/src/comment_parsers/mod.rs +++ b/harper-comments/src/comment_parsers/mod.rs @@ -1,9 +1,11 @@ mod go; +mod javadoc; mod jsdoc; mod unit; pub use go::Go; use harper_core::Span; +pub use javadoc::JavaDoc; pub use jsdoc::JsDoc; pub use unit::Unit; diff --git a/harper-comments/src/comment_parsers/unit.rs b/harper-comments/src/comment_parsers/unit.rs index 2cd50540..07eb144f 100644 --- a/harper-comments/src/comment_parsers/unit.rs +++ b/harper-comments/src/comment_parsers/unit.rs @@ -22,7 +22,7 @@ impl Parser for Unit { new_tokens.push(Token::new( Span::new_with_len(line.len(), 1), - harper_core::TokenKind::Newline(1) + harper_core::TokenKind::Newline(1), )); new_tokens diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index 3ab9d619..1865c48b 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -15,7 +15,7 @@ use crate::{FatToken, Lrc, Token, TokenKind, TokenStringExt}; pub struct Document { source: Lrc>, tokens: Vec, - parser: Box + parser: Box, } impl Default for Document { @@ -37,7 +37,7 @@ impl Document { let mut doc = Self { source, tokens: Vec::new(), - parser + parser, }; doc.parse(); @@ -59,11 +59,23 @@ impl Document { self.tokens = self.parser.parse(&self.source); self.condense_spaces(); self.condense_newlines(); + self.newlines_to_breaks(); self.condense_contractions(); self.condense_number_suffixes(); self.match_quotes(); } + /// Convert all sets of newlines greater than 2 to paragraph breaks. + fn newlines_to_breaks(&mut self) { + for token in &mut self.tokens { + if let TokenKind::Newline(n) = token.kind { + if n >= 2 { + token.kind = TokenKind::ParagraphBreak; + } + } + } + } + /// Given a list of indices, this function removes the subsequent /// `stretch_len - 1` elements after each index. /// @@ -102,7 +114,7 @@ impl Document { &old[indices .last() .map(|v| v + stretch_len) - .unwrap_or(indices.len())..] + .unwrap_or(indices.len())..], ); } @@ -242,7 +254,7 @@ impl Document { pub fn get_full_string(&self) -> String { self.get_span_content_str(Span { start: 0, - end: self.source.len() + end: self.source.len(), }) } @@ -502,7 +514,7 @@ fn is_chunk_terminator(token: &TokenKind) -> bool { match token { TokenKind::Punctuation(punct) => [Punctuation::Comma].contains(punct), - _ => false + _ => false, } } @@ -511,11 +523,11 @@ fn is_sentence_terminator(token: &TokenKind) -> bool { TokenKind::Punctuation(punct) => [ Punctuation::Period, Punctuation::Bang, - Punctuation::Question + Punctuation::Question, ] .contains(punct), - TokenKind::Newline(count) => *count >= 2, - _ => false + TokenKind::ParagraphBreak => true, + _ => false, } } @@ -636,7 +648,7 @@ mod tests { assert_token_count("This is the 3rd test", 9); assert_token_count( "It works even with weird capitalization like this: 600nD", - 18 + 18, ); } diff --git a/harper-core/src/parsers/mask.rs b/harper-core/src/parsers/mask.rs index 65dca812..2bd21aae 100644 --- a/harper-core/src/parsers/mask.rs +++ b/harper-core/src/parsers/mask.rs @@ -1,21 +1,21 @@ use super::Parser; use crate::mask::Masker; -use crate::{Token, TokenKind}; +use crate::{Span, Token, TokenKind}; /// Composes a Masker and a Parser to parse only masked chunks of text. pub struct Mask where M: Masker, - P: Parser + P: Parser, { pub masker: M, - pub parser: P + pub parser: P, } impl Mask where M: Masker, - P: Parser + P: Parser, { pub fn new(masker: M, parser: P) -> Self { Self { masker, parser } @@ -25,29 +25,33 @@ where impl Parser for Mask where M: Masker, - P: Parser + P: Parser, { fn parse(&mut self, source: &[char]) -> Vec { let mask = self.masker.create_mask(source); - let mut tokens = Vec::new(); + let mut tokens: Vec = Vec::new(); + + let mut last_allowed: Option = None; for (span, content) in mask.iter_allowed(source) { - let new_tokens = &mut self.parser.parse(content); + // Check if there was a line break between the last chunk. + if let Some(last_allowed) = last_allowed { + let intervening = Span::new(last_allowed.end, span.start); - if let Some(last) = new_tokens.last_mut() { - if let TokenKind::Newline(n) = &mut last.kind { - if *n == 1 { - *n = 2; - } + if intervening.get_content(source).contains(&'\n') { + tokens.push(Token::new(intervening, TokenKind::ParagraphBreak)) } } + let new_tokens = &mut self.parser.parse(content); + for token in new_tokens.iter_mut() { token.span.push_by(span.start); } tokens.append(new_tokens); + last_allowed = Some(span); } tokens diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 6e24e30e..13b62ca6 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -10,7 +10,7 @@ use crate::Quote; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Default)] pub struct Token { pub span: Span, - pub kind: TokenKind + pub kind: TokenKind, } impl Token { @@ -24,7 +24,7 @@ impl Token { FatToken { content, - kind: self.kind + kind: self.kind, } } } @@ -34,7 +34,7 @@ impl Token { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd)] pub struct FatToken { pub content: Vec, - pub kind: TokenKind + pub kind: TokenKind, } #[derive(Debug, Is, Clone, Copy, Serialize, Deserialize, PartialEq, Default, PartialOrd)] @@ -53,7 +53,8 @@ pub enum TokenKind { Hostname, /// A special token used for things like inline code blocks that should be /// ignored by all linters. - Unlintable + Unlintable, + ParagraphBreak, } #[derive(Debug, Serialize, Deserialize, Default, PartialEq, PartialOrd, Clone, Copy, Is)] @@ -62,7 +63,7 @@ pub enum NumberSuffix { Th, St, Nd, - Rd + Rd, } impl NumberSuffix { @@ -88,7 +89,7 @@ impl NumberSuffix { 7 => Some(Self::Th), 8 => Some(Self::Th), 9 => Some(Self::Th), - _ => None + _ => None, } } @@ -97,7 +98,7 @@ impl NumberSuffix { NumberSuffix::Th => vec!['t', 'h'], NumberSuffix::St => vec!['s', 't'], NumberSuffix::Nd => vec!['n', 'd'], - NumberSuffix::Rd => vec!['r', 'd'] + NumberSuffix::Rd => vec!['r', 'd'], } } @@ -125,7 +126,7 @@ impl NumberSuffix { ('R', 'd') => Some(NumberSuffix::Rd), ('r', 'D') => Some(NumberSuffix::Rd), ('R', 'D') => Some(NumberSuffix::Rd), - _ => None + _ => None, } } } diff --git a/harper-html/Cargo.toml b/harper-html/Cargo.toml index 3ef901c1..058fb31b 100644 --- a/harper-html/Cargo.toml +++ b/harper-html/Cargo.toml @@ -12,3 +12,6 @@ harper-core = { path = "../harper-core", version = "0.8.0" } harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.8.0" } tree-sitter-html = "0.19.0" tree-sitter = "0.20.10" + +[dev-dependencies] +paste = "1.0.15" diff --git a/harper-html/src/lib.rs b/harper-html/src/lib.rs index 47cf61ee..b4aefcf9 100644 --- a/harper-html/src/lib.rs +++ b/harper-html/src/lib.rs @@ -4,22 +4,24 @@ use harper_tree_sitter::TreeSitterMasker; use tree_sitter::Node; pub struct HtmlParser { - inner: parsers::Mask + inner: parsers::Mask, } impl HtmlParser { - pub fn new() -> Self { + fn node_condition(n: &Node) -> bool { + n.kind() == "text" + } +} + +impl Default for HtmlParser { + fn default() -> Self { Self { inner: parsers::Mask::new( TreeSitterMasker::new(tree_sitter_html::language(), Self::node_condition), - PlainEnglish - ) + PlainEnglish, + ), } } - - fn node_condition(n: &Node) -> bool { - n.kind() == "text" - } } impl Parser for HtmlParser { diff --git a/harper-html/tests/run_tests.rs b/harper-html/tests/run_tests.rs new file mode 100644 index 00000000..f2e2d1ce --- /dev/null +++ b/harper-html/tests/run_tests.rs @@ -0,0 +1,32 @@ +use harper_core::{Document, FullDictionary, LintGroup, LintGroupConfig, Linter}; + +/// Creates a unit test checking that the linting of a Markdown document (in +/// `tests_sources`) produces the expected number of lints. +macro_rules! create_test { + ($filename:ident.html, $correct_expected:expr) => { + paste::paste! { + #[test] + fn [](){ + let source = include_str!( + concat!( + "./test_sources/", + concat!(stringify!($filename), ".html") + ) + ); + + let document = Document::new_markdown(&source); + + let mut linter = LintGroup::new( + LintGroupConfig::default(), + FullDictionary::create_from_curated() + ); + let lints = linter.lint(&document); + + dbg!(&lints); + assert_eq!(lints.len(), $correct_expected); + } + } + }; +} + +create_test!(run_on.html, 0); diff --git a/harper-html/tests/test_sources/run_on.html b/harper-html/tests/test_sources/run_on.html new file mode 100644 index 00000000..f6fd5aef --- /dev/null +++ b/harper-html/tests/test_sources/run_on.html @@ -0,0 +1,36 @@ + + + + + + +

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+

Here is a paragraph

+ + + + + + diff --git a/harper-ls/src/backend.rs b/harper-ls/src/backend.rs index 2e176b43..507ac965 100644 --- a/harper-ls/src/backend.rs +++ b/harper-ls/src/backend.rs @@ -5,14 +5,7 @@ use std::sync::Arc; use harper_comments::CommentParser; use harper_core::parsers::{Markdown, PlainEnglish}; use harper_core::{ - Dictionary, - Document, - FullDictionary, - LintGroup, - Linter, - MergedDictionary, - Token, - TokenKind + Dictionary, Document, FullDictionary, LintGroup, Linter, MergedDictionary, Token, TokenKind, }; use harper_html::HtmlParser; use serde_json::Value; @@ -20,31 +13,13 @@ use tokio::sync::{Mutex, RwLock}; use tower_lsp::jsonrpc::Result; use tower_lsp::lsp_types::notification::PublishDiagnostics; use tower_lsp::lsp_types::{ - CodeActionOrCommand, - CodeActionParams, - CodeActionProviderCapability, - CodeActionResponse, - Command, - Diagnostic, - DidChangeConfigurationParams, - DidChangeTextDocumentParams, - DidCloseTextDocumentParams, - DidOpenTextDocumentParams, - DidSaveTextDocumentParams, - ExecuteCommandOptions, - ExecuteCommandParams, - InitializeParams, - InitializeResult, - InitializedParams, - MessageType, - PublishDiagnosticsParams, - Range, - ServerCapabilities, - TextDocumentSyncCapability, - TextDocumentSyncKind, - TextDocumentSyncOptions, - TextDocumentSyncSaveOptions, - Url + CodeActionOrCommand, CodeActionParams, CodeActionProviderCapability, CodeActionResponse, + Command, Diagnostic, DidChangeConfigurationParams, DidChangeTextDocumentParams, + DidCloseTextDocumentParams, DidOpenTextDocumentParams, DidSaveTextDocumentParams, + ExecuteCommandOptions, ExecuteCommandParams, InitializeParams, InitializeResult, + InitializedParams, MessageType, PublishDiagnosticsParams, Range, ServerCapabilities, + TextDocumentSyncCapability, TextDocumentSyncKind, TextDocumentSyncOptions, + TextDocumentSyncSaveOptions, Url, }; use tower_lsp::{Client, LanguageServer}; use tracing::{error, info, instrument}; @@ -60,7 +35,7 @@ struct DocumentState { document: Document, ident_dict: Arc, linter: LintGroup>, - language_id: Option + language_id: Option, } /// Deallocate @@ -68,7 +43,7 @@ pub struct Backend { client: Client, static_dictionary: Arc, config: RwLock, - doc_state: Mutex> + doc_state: Mutex>, } impl Backend { @@ -79,7 +54,7 @@ impl Backend { client, static_dictionary: dictionary.into(), doc_state: Mutex::new(HashMap::new()), - config: RwLock::new(config) + config: RwLock::new(config), } } @@ -174,7 +149,7 @@ impl Backend { #[instrument(skip(self))] async fn generate_file_dictionary( &self, - url: &Url + url: &Url, ) -> anyhow::Result> { let (global_dictionary, file_dictionary) = tokio::join!( self.generate_global_dictionary(), @@ -191,11 +166,11 @@ impl Backend { async fn update_document_from_file( &self, url: &Url, - language_id: Option<&str> + language_id: Option<&str>, ) -> anyhow::Result<()> { let content = match tokio::fs::read_to_string( url.to_file_path() - .map_err(|_| anyhow::format_err!("Could not extract file path."))? + .map_err(|_| anyhow::format_err!("Could not extract file path."))?, ) .await { @@ -214,7 +189,7 @@ impl Backend { &self, url: &Url, text: &str, - language_id: Option<&str> + language_id: Option<&str>, ) -> anyhow::Result<()> { let mut doc_lock = self.doc_state.lock().await; let config_lock = self.config.read().await; @@ -226,7 +201,7 @@ impl Backend { let mut doc_state = DocumentState { linter: LintGroup::new( config_lock.lint_config, - self.generate_file_dictionary(url).await? + self.generate_file_dictionary(url).await?, ), language_id: language_id .map(|v| v.to_string()) @@ -262,7 +237,7 @@ impl Backend { } else if language_id == "gitcommit" { Document::new(text, Box::new(GitCommitParser)) } else if language_id == "html" { - Document::new(text, Box::new(HtmlParser::new())) + Document::new(text, Box::new(HtmlParser::default())) } else if language_id == "mail" { Document::new(text, Box::new(PlainEnglish)) } else { @@ -279,7 +254,7 @@ impl Backend { async fn generate_code_actions( &self, url: &Url, - range: Range + range: Range, ) -> Result> { let (config, mut doc_states) = tokio::join!(self.config.read(), self.doc_state.lock()); let Some(doc_state) = doc_states.get_mut(url) else { @@ -311,7 +286,7 @@ impl Backend { actions.push(CodeActionOrCommand::Command(Command::new( "Open URL".to_string(), "HarperOpen".to_string(), - Some(vec![doc_state.document.get_span_content_str(span).into()]) + Some(vec![doc_state.document.get_span_content_str(span).into()]), ))) } @@ -331,7 +306,7 @@ impl Backend { lints_to_diagnostics( doc_state.document.get_full_content(), &lints, - config.diagnostic_severity + config.diagnostic_severity, ) } @@ -342,7 +317,7 @@ impl Backend { let result = PublishDiagnosticsParams { uri: url.clone(), diagnostics, - version: None + version: None, }; self.client @@ -372,11 +347,11 @@ impl LanguageServer for Backend { change: Some(TextDocumentSyncKind::FULL), will_save: None, will_save_wait_until: None, - save: Some(TextDocumentSyncSaveOptions::Supported(true)) - } + save: Some(TextDocumentSyncSaveOptions::Supported(true)), + }, )), ..Default::default() - } + }, }) } @@ -407,7 +382,7 @@ impl LanguageServer for Backend { .update_document( ¶ms.text_document.uri, ¶ms.text_document.text, - Some(¶ms.text_document.language_id) + Some(¶ms.text_document.language_id), ) .await; @@ -496,7 +471,7 @@ impl LanguageServer for Backend { error!("Unable to open URL: {}", err); } }, - _ => () + _ => (), } Ok(None) @@ -527,7 +502,7 @@ impl LanguageServer for Backend { // Update documents with new config futures::future::join_all( keys.iter() - .map(|key| self.update_document_from_file(key, None)) + .map(|key| self.update_document_from_file(key, None)), ) .await; } diff --git a/justfile b/justfile index 352e7323..f524c62b 100644 --- a/justfile +++ b/justfile @@ -86,3 +86,7 @@ dogfood: test: cargo test cargo test --release + +parse file: + cargo run --bin harper-cli -- parse {{file}} +