Skip to content

Commit

Permalink
Performance: create regexp as a top-level static
Browse files Browse the repository at this point in the history
This seems to be a huge performance improvement: `make benchmark`
changes from 1.7s to 0.9s.
  • Loading branch information
dandavison committed May 18, 2020
1 parent 1380ef6 commit 2cb3e40
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/edits.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use regex::Regex;

use lazy_static::lazy_static;
use unicode_width::UnicodeWidthStr;

use crate::align;
Expand Down Expand Up @@ -75,13 +76,16 @@ where
(annotated_minus_lines, annotated_plus_lines)
}

lazy_static! {
static ref TOKENIZATION_REGEXP: Regex = Regex::new(r#"[\t ,;.:()\[\]<>/'"-]+"#).unwrap();
}

/// Split line into tokens for alignment. The alignment algorithm aligns sequences of substrings;
/// not individual characters.
fn tokenize(line: &str) -> Vec<&str> {
let separators = Regex::new(r#"[\t ,;.:()\[\]<>/'"-]+"#).unwrap();
let mut tokens = Vec::new();
let mut offset = 0;
for m in separators.find_iter(line) {
for m in TOKENIZATION_REGEXP.find_iter(line) {
tokens.push(&line[offset..m.start()]);
// Align separating text as multiple single-character tokens.
for i in m.start()..m.end() {
Expand Down

0 comments on commit 2cb3e40

Please sign in to comment.