Skip to content

Commit

Permalink
feat: fixed issues with HTML parser + added JavaDoc parser
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Jul 19, 2024
1 parent d596a5f commit ad78ca2
Show file tree
Hide file tree
Showing 16 changed files with 199 additions and 100 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions harper-comments/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ repository = "https://github.com/elijah-potter/harper"

[dependencies]
harper-core = { path = "../harper-core", version = "0.8.0" }
harper-html = { path = "../harper-html", version = "0.8.0" }
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.8.0" }
tree-sitter = "0.20.10"
tree-sitter-rust = "0.20.4"
Expand Down
15 changes: 8 additions & 7 deletions harper-comments/src/comment_parser.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::path::Path;

use comment_parsers::{Go, JsDoc, Unit};
use comment_parsers::{Go, JavaDoc, JsDoc, Unit};
use harper_core::parsers::{self, Parser};
use harper_core::{FullDictionary, Token};
use harper_tree_sitter::TreeSitterMasker;
Expand All @@ -9,7 +9,7 @@ use tree_sitter::Node;
use crate::comment_parsers;

pub struct CommentParser {
inner: parsers::Mask<TreeSitterMasker, Box<dyn Parser>>
inner: parsers::Mask<TreeSitterMasker, Box<dyn Parser>>,
}

impl CommentParser {
Expand All @@ -35,20 +35,21 @@ impl CommentParser {
"lua" => tree_sitter_lua::language(),
"sh" => tree_sitter_bash::language(),
"java" => tree_sitter_java::language(),
_ => return None
_ => return None,
};

let comment_parser: Box<dyn Parser> = match language_id {
"javascriptreact" | "typescript" | "typescriptreact" | "javascript" => Box::new(JsDoc),
"java" => Box::new(JavaDoc::default()),
"go" => Box::new(Go),
_ => Box::new(Unit)
_ => Box::new(Unit),
};

Some(Self {
inner: parsers::Mask::new(
TreeSitterMasker::new(language, Self::node_condition),
comment_parser
)
comment_parser,
),
})
}

Expand Down Expand Up @@ -81,7 +82,7 @@ impl CommentParser {
"sh" => "sh",
"bash" => "sh",
"java" => "java",
_ => return None
_ => return None,
})
}

Expand Down
24 changes: 24 additions & 0 deletions harper-comments/src/comment_parsers/javadoc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use harper_core::{parsers::Parser, Token};
use harper_html::HtmlParser;

use super::without_initiators;

#[derive(Default)]
pub struct JavaDoc {
html_parser: HtmlParser,
}

impl Parser for JavaDoc {
fn parse(&mut self, source: &[char]) -> Vec<Token> {
let actual = without_initiators(source);
let actual_source = actual.get_content(source);

let mut tokens = self.html_parser.parse(actual_source);

for token in tokens.iter_mut() {
token.span.push_by(actual.start);
}

tokens
}
}
6 changes: 3 additions & 3 deletions harper-comments/src/comment_parsers/jsdoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ impl Parser for JsDoc {

new_tokens.push(Token::new(
Span::new_with_len(line.len(), 1),
harper_core::TokenKind::Newline(1)
harper_core::TokenKind::Newline(1),
));

new_tokens
Expand Down Expand Up @@ -204,7 +204,7 @@ mod tests {
TokenKind::Unlintable,
TokenKind::Unlintable,
TokenKind::Punctuation(Punctuation::Period),
TokenKind::Newline(2),
TokenKind::Newline(1),
]
);
}
Expand All @@ -217,6 +217,6 @@ mod tests {

assert!(document
.tokens()
.all(|t| t.kind.is_unlintable() || t.kind.is_newline()));
.all(|t| t.kind.is_unlintable() || t.kind.is_newline() || t.kind.is_paragraph_break()));
}
}
2 changes: 2 additions & 0 deletions harper-comments/src/comment_parsers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
mod go;
mod javadoc;
mod jsdoc;
mod unit;

pub use go::Go;
use harper_core::Span;
pub use javadoc::JavaDoc;
pub use jsdoc::JsDoc;
pub use unit::Unit;

Expand Down
2 changes: 1 addition & 1 deletion harper-comments/src/comment_parsers/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl Parser for Unit {

new_tokens.push(Token::new(
Span::new_with_len(line.len(), 1),
harper_core::TokenKind::Newline(1)
harper_core::TokenKind::Newline(1),
));

new_tokens
Expand Down
30 changes: 21 additions & 9 deletions harper-core/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::{FatToken, Lrc, Token, TokenKind, TokenStringExt};
pub struct Document {
source: Lrc<Vec<char>>,
tokens: Vec<Token>,
parser: Box<dyn Parser>
parser: Box<dyn Parser>,
}

impl Default for Document {
Expand All @@ -37,7 +37,7 @@ impl Document {
let mut doc = Self {
source,
tokens: Vec::new(),
parser
parser,
};
doc.parse();

Expand All @@ -59,11 +59,23 @@ impl Document {
self.tokens = self.parser.parse(&self.source);
self.condense_spaces();
self.condense_newlines();
self.newlines_to_breaks();
self.condense_contractions();
self.condense_number_suffixes();
self.match_quotes();
}

/// Convert all sets of newlines greater than 2 to paragraph breaks.
fn newlines_to_breaks(&mut self) {
for token in &mut self.tokens {
if let TokenKind::Newline(n) = token.kind {
if n >= 2 {
token.kind = TokenKind::ParagraphBreak;
}
}
}
}

/// Given a list of indices, this function removes the subsequent
/// `stretch_len - 1` elements after each index.
///
Expand Down Expand Up @@ -102,7 +114,7 @@ impl Document {
&old[indices
.last()
.map(|v| v + stretch_len)
.unwrap_or(indices.len())..]
.unwrap_or(indices.len())..],
);
}

Expand Down Expand Up @@ -242,7 +254,7 @@ impl Document {
pub fn get_full_string(&self) -> String {
self.get_span_content_str(Span {
start: 0,
end: self.source.len()
end: self.source.len(),
})
}

Expand Down Expand Up @@ -502,7 +514,7 @@ fn is_chunk_terminator(token: &TokenKind) -> bool {

match token {
TokenKind::Punctuation(punct) => [Punctuation::Comma].contains(punct),
_ => false
_ => false,
}
}

Expand All @@ -511,11 +523,11 @@ fn is_sentence_terminator(token: &TokenKind) -> bool {
TokenKind::Punctuation(punct) => [
Punctuation::Period,
Punctuation::Bang,
Punctuation::Question
Punctuation::Question,
]
.contains(punct),
TokenKind::Newline(count) => *count >= 2,
_ => false
TokenKind::ParagraphBreak => true,
_ => false,
}
}

Expand Down Expand Up @@ -636,7 +648,7 @@ mod tests {
assert_token_count("This is the 3rd test", 9);
assert_token_count(
"It works even with weird capitalization like this: 600nD",
18
18,
);
}

Expand Down
28 changes: 16 additions & 12 deletions harper-core/src/parsers/mask.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
use super::Parser;
use crate::mask::Masker;
use crate::{Token, TokenKind};
use crate::{Span, Token, TokenKind};

/// Composes a Masker and a Parser to parse only masked chunks of text.
pub struct Mask<M, P>
where
M: Masker,
P: Parser
P: Parser,
{
pub masker: M,
pub parser: P
pub parser: P,
}

impl<M, P> Mask<M, P>
where
M: Masker,
P: Parser
P: Parser,
{
pub fn new(masker: M, parser: P) -> Self {
Self { masker, parser }
Expand All @@ -25,29 +25,33 @@ where
impl<M, P> Parser for Mask<M, P>
where
M: Masker,
P: Parser
P: Parser,
{
fn parse(&mut self, source: &[char]) -> Vec<Token> {
let mask = self.masker.create_mask(source);

let mut tokens = Vec::new();
let mut tokens: Vec<Token> = Vec::new();

let mut last_allowed: Option<Span> = None;

for (span, content) in mask.iter_allowed(source) {
let new_tokens = &mut self.parser.parse(content);
// Check if there was a line break between the last chunk.
if let Some(last_allowed) = last_allowed {
let intervening = Span::new(last_allowed.end, span.start);

if let Some(last) = new_tokens.last_mut() {
if let TokenKind::Newline(n) = &mut last.kind {
if *n == 1 {
*n = 2;
}
if intervening.get_content(source).contains(&'\n') {
tokens.push(Token::new(intervening, TokenKind::ParagraphBreak))
}
}

let new_tokens = &mut self.parser.parse(content);

for token in new_tokens.iter_mut() {
token.span.push_by(span.start);
}

tokens.append(new_tokens);
last_allowed = Some(span);
}

tokens
Expand Down
17 changes: 9 additions & 8 deletions harper-core/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::Quote;
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Default)]
pub struct Token {
pub span: Span,
pub kind: TokenKind
pub kind: TokenKind,
}

impl Token {
Expand All @@ -24,7 +24,7 @@ impl Token {

FatToken {
content,
kind: self.kind
kind: self.kind,
}
}
}
Expand All @@ -34,7 +34,7 @@ impl Token {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd)]
pub struct FatToken {
pub content: Vec<char>,
pub kind: TokenKind
pub kind: TokenKind,
}

#[derive(Debug, Is, Clone, Copy, Serialize, Deserialize, PartialEq, Default, PartialOrd)]
Expand All @@ -53,7 +53,8 @@ pub enum TokenKind {
Hostname,
/// A special token used for things like inline code blocks that should be
/// ignored by all linters.
Unlintable
Unlintable,
ParagraphBreak,
}

#[derive(Debug, Serialize, Deserialize, Default, PartialEq, PartialOrd, Clone, Copy, Is)]
Expand All @@ -62,7 +63,7 @@ pub enum NumberSuffix {
Th,
St,
Nd,
Rd
Rd,
}

impl NumberSuffix {
Expand All @@ -88,7 +89,7 @@ impl NumberSuffix {
7 => Some(Self::Th),
8 => Some(Self::Th),
9 => Some(Self::Th),
_ => None
_ => None,
}
}

Expand All @@ -97,7 +98,7 @@ impl NumberSuffix {
NumberSuffix::Th => vec!['t', 'h'],
NumberSuffix::St => vec!['s', 't'],
NumberSuffix::Nd => vec!['n', 'd'],
NumberSuffix::Rd => vec!['r', 'd']
NumberSuffix::Rd => vec!['r', 'd'],
}
}

Expand Down Expand Up @@ -125,7 +126,7 @@ impl NumberSuffix {
('R', 'd') => Some(NumberSuffix::Rd),
('r', 'D') => Some(NumberSuffix::Rd),
('R', 'D') => Some(NumberSuffix::Rd),
_ => None
_ => None,
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions harper-html/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ harper-core = { path = "../harper-core", version = "0.8.0" }
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.8.0" }
tree-sitter-html = "0.19.0"
tree-sitter = "0.20.10"

[dev-dependencies]
paste = "1.0.15"
Loading

0 comments on commit ad78ca2

Please sign in to comment.