Skip to content

Commit

Permalink
code block selectors
Browse files Browse the repository at this point in the history
Add code block selectors:

~~~
```           # all code blocks
```rust       # code blocks for rust
``` foo       # code blocks whose contents contains "foo"
```rust foo   # combined
~~~

Resolves #142
  • Loading branch information
yshavit authored Jul 21, 2024
1 parent 28ccd47 commit 4df7dea
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 18 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ You can select...
$ cat example.md | mdq '> foo' # find block quotes containing "foo"
```

- Code blocks:

```bash
$ cat example.md | mdq '```rust fizz' # find code blocks for rust with "fizz" within them
```

The `foo`s and `bar`s above can be:

- an `unquoted string` that starts with a letter, as shown above
Expand Down
67 changes: 58 additions & 9 deletions src/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ impl StringMatcher {
}
}

pub fn read(chars: &mut ParsingIterator, bareword_end: char) -> ParseResult<Self> {
pub fn read<C>(chars: &mut ParsingIterator, bareword_end: C) -> ParseResult<Self>
where
C: Into<CharEnd>,
{
chars.drop_whitespace();
let peek_ch = match chars.peek() {
None => return Ok(StringMatcher::any()),
Expand All @@ -78,6 +81,7 @@ impl StringMatcher {
if peek_ch.is_alphanumeric() {
return Ok(Self::parse_matcher_bare(chars, bareword_end, anchor_start));
}
let bareword_end = bareword_end.into();
match peek_ch {
'*' => {
let _ = chars.next(); // drop the char we peeked
Expand All @@ -102,7 +106,7 @@ impl StringMatcher {
))
}
}
other if other == bareword_end => {
other if bareword_end.test(other) => {
// do *not* consume the bareword end delimiter!
if anchor_start {
Err(ParseErrorReason::InvalidSyntax(
Expand All @@ -118,22 +122,38 @@ impl StringMatcher {
}
}

fn parse_matcher_bare(chars: &mut ParsingIterator, bareword_end: char, anchor_start: bool) -> Self {
fn parse_matcher_bare<C>(chars: &mut ParsingIterator, bareword_end: C, anchor_start: bool) -> Self
where
C: Into<CharEnd>,
{
let mut result = String::with_capacity(20); // just a guess
let mut dropped = String::with_capacity(8); // also a guess

let bareword_end = bareword_end.into();
let anchor_end = loop {
// Drop whitespace, but keep a record of it. If we see a char within this bareword (ie not end-of-input or
// the bareword_end), then we'll append that whitespace back.
chars.drop_to_while(&mut dropped, |ch| ch.is_whitespace());
let Some(ch) = chars.peek() else {
let ch = match bareword_end {
CharEnd::AtChar(_) => {
// Drop whitespace, but keep a record of it. If we see a char within this bareword
// (ie not end-of-input or the bareword_end), then we'll append that whitespace back at the end
// of this iteration.
chars.drop_to_while(&mut dropped, |ch| ch.is_whitespace());
chars.peek()
}
CharEnd::AtWhitespace => match chars.peek() {
None => None,
Some(ch) if ch.is_whitespace() => None,
ch @ Some(_) => ch,
},
};
let Some(ch) = ch else {
break false;
};

if ch == Self::BAREWORD_ANCHOR_END {
let _ = chars.next();
break true;
}
if ch == bareword_end {
if bareword_end.test(ch) {
break false;
}
let _ = chars.next();
Expand Down Expand Up @@ -223,7 +243,7 @@ impl StringMatcher {
}
}

fn any() -> Self {
pub fn any() -> Self {
Self {
re: Regex::new(".*").expect("internal error"),
}
Expand All @@ -240,6 +260,26 @@ impl StringMatcher {
}
}

pub enum CharEnd {
AtChar(char),
AtWhitespace,
}

impl From<char> for CharEnd {
fn from(ch: char) -> Self {
Self::AtChar(ch)
}
}

impl CharEnd {
fn test(&self, test_ch: char) -> bool {
match self {
CharEnd::AtChar(look_for) => look_for == &test_ch,
CharEnd::AtWhitespace => test_ch.is_whitespace(),
}
}
}

struct SubstringToRegex {
look_for: String,
case_sensitive: bool,
Expand Down Expand Up @@ -273,6 +313,7 @@ mod test {
use crate::parse_common::Position;
use crate::select::SELECTOR_SEPARATOR;
use indoc::indoc;
use std::str::FromStr;

#[test]
fn bareword() {
Expand Down Expand Up @@ -579,4 +620,12 @@ mod test {
s.push_str(value);
re(&s)
}

impl From<&str> for StringMatcher {
fn from(value: &str) -> Self {
Self {
re: Regex::from_str(value).unwrap(),
}
}
}
}
53 changes: 44 additions & 9 deletions src/select/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::parse_common::Position;
use crate::parsing_iter::ParsingIterator;
use crate::select::base::Selector;
use crate::select::sel_block_quote::BlockQuoteSelector;
use crate::select::sel_code_block::CodeBlockSelector;
use crate::select::sel_image::ImageSelector;
use crate::select::sel_link::LinkSelector;
use crate::select::sel_list_item::ListItemSelector;
Expand Down Expand Up @@ -132,7 +133,9 @@ selectors![
{'['} Link,
! {'['} Image,

{'>'} BlockQuote
{'>'} BlockQuote,

{'`'} CodeBlock,
];

impl MdqRefSelector {
Expand Down Expand Up @@ -255,14 +258,14 @@ mod test {

mod single_selector_parse {
use super::*;
use crate::select::ParseErrorReason::UnexpectedCharacter;
use crate::variants_checker;

#[test]
fn section() {
let input = "#";
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
let section_sel_parsed = SectionSelector::read(&mut ParsingIterator::new(&input[1..])).unwrap();
assert_eq!(mdq_ref_sel_parsed, Ok(MdqRefSelector::Section(section_sel_parsed)));
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::Section(section_sel_parsed));
}

#[test]
Expand All @@ -271,7 +274,7 @@ mod test {
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
let item_parsed =
ListItemSelector::read(ListItemType::Ordered, &mut ParsingIterator::new(&input[1..])).unwrap();
assert_eq!(mdq_ref_sel_parsed, Ok(MdqRefSelector::ListItem(item_parsed)));
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::ListItem(item_parsed));
}

#[test]
Expand All @@ -280,15 +283,15 @@ mod test {
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
let item_parsed =
ListItemSelector::read(ListItemType::Unordered, &mut ParsingIterator::new(&input[1..])).unwrap();
assert_eq!(mdq_ref_sel_parsed, Ok(MdqRefSelector::ListItem(item_parsed)));
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::ListItem(item_parsed));
}

#[test]
fn link() {
let input = "[]()";
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
let item_parsed = LinkSelector::read(&mut ParsingIterator::new(&input[1..])).unwrap();
assert_eq!(mdq_ref_sel_parsed, Ok(MdqRefSelector::Link(item_parsed)));
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::Link(item_parsed));
}

#[test]
Expand All @@ -297,23 +300,55 @@ mod test {
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
// note: input[2..] because parse_selector reads both the '!' and the '['
let item_parsed = ImageSelector::read(&mut ParsingIterator::new(&input[2..])).unwrap();
assert_eq!(mdq_ref_sel_parsed, Ok(MdqRefSelector::Image(item_parsed)));
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::Image(item_parsed));
}

#[test]
fn block_quote() {
let input = ">";
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
let item_parsed = BlockQuoteSelector::read(&mut ParsingIterator::new(&input[1..])).unwrap();
assert_eq!(mdq_ref_sel_parsed, Ok(MdqRefSelector::BlockQuote(item_parsed)));
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::BlockQuote(item_parsed));
}

#[test]
fn code_block() {
let input = "```";
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
let item_parsed = CodeBlockSelector::read(&mut ParsingIterator::new(&input[1..])).unwrap();
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::CodeBlock(item_parsed));
}

#[test]
fn code_block_only_two_backticks() {
let input = "``";
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
assert_eq!(mdq_ref_sel_parsed, Err(ParseErrorReason::Expected('`')));
}

#[test]
fn unknown() {
let input = "\u{2603}";
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
assert_eq!(mdq_ref_sel_parsed, Err(UnexpectedCharacter('\u{2603}')));
assert_eq!(
mdq_ref_sel_parsed,
Err(ParseErrorReason::UnexpectedCharacter('\u{2603}'))
);
}

fn expect_ok(actual: ParseResult<MdqRefSelector>, expected: MdqRefSelector) {
actual.iter().for_each(|s| CHECKER.see(s));
assert_eq!(actual, Ok(expected))
}

variants_checker!(CHECKER = MdqRefSelector{
Section(_),
ListItem(_),
Link(_),
Image(_),
BlockQuote(_),
CodeBlock(_),
});
}

#[test]
Expand Down
1 change: 1 addition & 0 deletions src/select/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod api;
mod base;
mod sel_block_quote;
mod sel_code_block;
mod sel_image;
mod sel_link;
mod sel_list_item;
Expand Down
107 changes: 107 additions & 0 deletions src/select/sel_code_block.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
use crate::matcher::{CharEnd, StringMatcher};
use crate::parsing_iter::ParsingIterator;
use crate::select::base::Selector;
use crate::select::{ParseResult, SELECTOR_SEPARATOR};
use crate::tree::{CodeBlock, CodeVariant};

#[derive(Debug, PartialEq)]
pub struct CodeBlockSelector {
lang_matcher: StringMatcher,
contents_matcher: StringMatcher,
}

impl CodeBlockSelector {
pub fn read(iter: &mut ParsingIterator) -> ParseResult<Self> {
iter.require_str("``")?; // first ` is from dispatcher

let lang_matcher = match iter.peek() {
Some(ch) if !ch.is_whitespace() => StringMatcher::read(iter, CharEnd::AtWhitespace)?,
_ => StringMatcher::any(),
};
iter.require_whitespace_or(SELECTOR_SEPARATOR, "```")?;
let contents_matcher = StringMatcher::read(iter, SELECTOR_SEPARATOR)?;
Ok(Self {
lang_matcher,
contents_matcher,
})
}
}

impl<'a> Selector<'a, &'a CodeBlock> for CodeBlockSelector {
fn matches(&self, code_block: &'a CodeBlock) -> bool {
let lang_matches = match &code_block.variant {
CodeVariant::Code(code_opts) => {
let actual_lang = match code_opts {
Some(co) => &co.language,
None => "",
};
self.lang_matcher.matches(actual_lang)
}
CodeVariant::Math { .. } | CodeVariant::Toml | CodeVariant::Yaml => false,
};
lang_matches && self.contents_matcher.matches(&code_block.value)
}
}

#[cfg(test)]
mod tests {
use super::*;

mod parsing {
use super::*;
use crate::matcher::StringMatcher;
use crate::parsing_iter::ParsingIterator;

#[test]
fn only_backticks() {
let input_str = "``";
let actual = CodeBlockSelector::read(&mut ParsingIterator::new(input_str)).unwrap();
assert_eq!(
actual,
CodeBlockSelector {
lang_matcher: StringMatcher::any(),
contents_matcher: StringMatcher::any(),
},
)
}

#[test]
fn only_language() {
let input_str = "``rust";
let actual = CodeBlockSelector::read(&mut ParsingIterator::new(input_str)).unwrap();
assert_eq!(
actual,
CodeBlockSelector {
lang_matcher: StringMatcher::from("(?i)rust"),
contents_matcher: StringMatcher::any(),
},
)
}

#[test]
fn only_contents() {
let input_str = "`` foo";
let actual = CodeBlockSelector::read(&mut ParsingIterator::new(input_str)).unwrap();
assert_eq!(
actual,
CodeBlockSelector {
lang_matcher: StringMatcher::any(),
contents_matcher: StringMatcher::from("(?i)foo"),
},
)
}

#[test]
fn both() {
let input_str = "``rust fizz";
let actual = CodeBlockSelector::read(&mut ParsingIterator::new(input_str)).unwrap();
assert_eq!(
actual,
CodeBlockSelector {
lang_matcher: StringMatcher::from("(?i)rust"),
contents_matcher: StringMatcher::from("(?i)fizz"),
},
)
}
}
}
6 changes: 6 additions & 0 deletions src/tree_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ impl<'a> From<&'a BlockQuote> for MdElemRef<'a> {
}
}

impl<'a> From<&'a CodeBlock> for MdElemRef<'a> {
fn from(value: &'a CodeBlock) -> Self {
MdElemRef::CodeBlock(value)
}
}

impl<'a> From<ListItemRef<'a>> for MdElemRef<'a> {
fn from(value: ListItemRef<'a>) -> Self {
MdElemRef::ListItem(value)
Expand Down

0 comments on commit 4df7dea

Please sign in to comment.