Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement smart punctuation #470

Merged
merged 11 commits into from
Sep 1, 2020
24 changes: 24 additions & 0 deletions benches/html_rendering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ use std::str::from_utf8;
static CRDT_BYTES: &[u8] = include_bytes!("../third_party/xi-editor/crdt.md");

fn criterion_benchmark(c: &mut Criterion) {
let mut full_opts = Options::empty();
full_opts.insert(Options::ENABLE_TABLES);
full_opts.insert(Options::ENABLE_FOOTNOTES);
full_opts.insert(Options::ENABLE_STRIKETHROUGH);
full_opts.insert(Options::ENABLE_TASKLISTS);
full_opts.insert(Options::ENABLE_SMART_PUNCTUATION);

c.bench_function("crdt_total", |b| {
let input = from_utf8(CRDT_BYTES).unwrap();
let mut buf = String::with_capacity(input.len() * 3 / 2);
Expand All @@ -30,12 +37,29 @@ fn criterion_benchmark(c: &mut Criterion) {
})
});

c.bench_function("crdt_all_options_parse", |b| {
let input = from_utf8(CRDT_BYTES).unwrap();

b.iter(|| Parser::new_ext(input, full_opts).count())
});

c.bench_function("crdt_parse", |b| {
let input = from_utf8(CRDT_BYTES).unwrap();

b.iter(|| Parser::new_ext(input, Options::empty()).count())
});

c.bench_function("smart_punctuation", |b| {
let input = r#"""'This here a real "quote"'

And -- if you're interested -- some em-dashes. Wait --- she actually said that?

Wow... Becky is so 'mean'!
"""#;

b.iter(|| Parser::new_ext(input, full_opts).count());
});

c.bench_function("links_n_emphasis", |b| {
let input = r#"""This is a [link](example.com). **Cool!**

Expand Down
51 changes: 25 additions & 26 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ fn generate_tests_from_spec() {
// and make it easy to eventually add other hardcoded paths in the future if needed
let hardcoded = [
"./third_party/CommonMark/spec.txt",
"./third_party/CommonMark/smart_punct.txt",
"./third_party/GitHub/gfm_table.txt",
"./third_party/GitHub/gfm_strikethrough.txt",
"./third_party/GitHub/gfm_tasklist.txt",
];
let hardcoded_iter = hardcoded.into_iter().map(PathBuf::from);
let hardcoded_iter = hardcoded.iter().map(PathBuf::from);

// Create an iterator over the files in the specs/ directory that have a .txt extension
let spec_files = fs::read_dir("./specs")
Expand Down Expand Up @@ -82,13 +83,14 @@ fn {}_test_{i}() {{
let original = r##"{original}"##;
let expected = r##"{expected}"##;

test_markdown_html(original, expected);
test_markdown_html(original, expected, {smart_punct});
}}
"###,
spec_name,
i = i + 1,
original = testcase.original,
expected = testcase.expected
expected = testcase.expected,
smart_punct = testcase.smart_punct,
))
.unwrap();

Expand Down Expand Up @@ -133,14 +135,15 @@ pub struct Spec<'a> {
#[cfg(feature = "gen-tests")]
impl<'a> Spec<'a> {
pub fn new(spec: &'a str) -> Self {
Spec { spec: spec }
Spec { spec }
}
}

#[cfg(feature = "gen-tests")]
pub struct TestCase {
pub original: String,
pub expected: String,
pub smart_punct: bool,
}

#[cfg(feature = "gen-tests")]
Expand All @@ -149,37 +152,33 @@ impl<'a> Iterator for Spec<'a> {

fn next(&mut self) -> Option<TestCase> {
let spec = self.spec;

let i_start = match self
.spec
.find("```````````````````````````````` example\n")
.map(|pos| pos + 41)
{
Some(pos) => pos,
None => return None,
};

let i_end = match self.spec[i_start..]
let prefix = "```````````````````````````````` example";

let (i_start, smart_punct) = self.spec.find(prefix).and_then(|pos| {
let suffix = "_smartpunct\n";
if spec[(pos + prefix.len())..].starts_with(suffix) {
Some((pos + prefix.len() + suffix.len(), true))
} else if spec[(pos + prefix.len())..].starts_with('\n') {
Some((pos + prefix.len() + 1, false))
} else {
None
}
})?;

let i_end = self.spec[i_start..]
.find("\n.\n")
.map(|pos| (pos + 1) + i_start)
{
Some(pos) => pos,
None => return None,
};
.map(|pos| (pos + 1) + i_start)?;

let e_end = match self.spec[i_end + 2..]
let e_end = self.spec[i_end + 2..]
.find("````````````````````````````````\n")
.map(|pos| pos + i_end + 2)
{
Some(pos) => pos,
None => return None,
};
.map(|pos| pos + i_end + 2)?;

self.spec = &self.spec[e_end + 33..];

let test_case = TestCase {
original: spec[i_start..i_end].to_string().replace("→", "\t"),
expected: spec[i_end + 2..e_end].to_string().replace("→", "\t"),
smart_punct,
};

Some(test_case)
Expand Down
Loading