From 24ff5a2226aa36e8df90571378b792fdf3d97a69 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Sat, 21 May 2022 21:37:10 +1200 Subject: [PATCH] preinitialize slow regex --- pagefind/Cargo.toml | 1 + pagefind/src/fossick/mod.rs | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pagefind/Cargo.toml b/pagefind/Cargo.toml index 68bedca7..fcb3c85d 100644 --- a/pagefind/Cargo.toml +++ b/pagefind/Cargo.toml @@ -29,6 +29,7 @@ minifier = "0.0.43" sha-1 = "0.10" serde_json = "1" serde = { version = "1", features = ["derive"] } +lazy_static = "1.4.0" [dev-dependencies] json_dotpath = "1.1.0" diff --git a/pagefind/src/fossick/mod.rs b/pagefind/src/fossick/mod.rs index 6b26533e..c02318b6 100644 --- a/pagefind/src/fossick/mod.rs +++ b/pagefind/src/fossick/mod.rs @@ -1,4 +1,5 @@ use hashbrown::HashMap; +use lazy_static::lazy_static; use lol_html::html_content::ContentType; use lol_html::{element, text, HtmlRewriter, Settings}; use regex::Regex; @@ -17,6 +18,12 @@ use crate::fragments::{PageFragment, PageFragmentData}; use crate::utils::full_hash; use crate::SearchOptions; +lazy_static! { + static ref EXTRANEOUS_NEWLINES: Regex = Regex::new("(^|\\s)*((\n|\r\n)\\s*)+($|\\s)*").unwrap(); + static ref TRIM_NEWLINES: Regex = Regex::new("^\n|\n$").unwrap(); + static ref EXTRANEOUS_SPACES: Regex = Regex::new("\\s{2,}").unwrap(); +} + pub struct FossickedData { pub file_path: PathBuf, pub fragment: PageFragment, @@ -205,13 +212,9 @@ fn build_url(page_url: &Path, options: &SearchOptions) -> String { } fn normalize_content(content: &str) -> String { - let extraneous_newlines = Regex::new("(^|\\s)*((\n|\r\n)\\s*)+($|\\s)*").unwrap(); - let trim_newlines = Regex::new("^\n|\n$").unwrap(); - let extraneous_spaces = Regex::new("\\s{2,}").unwrap(); - - let content = extraneous_newlines.replace_all(content, "\n"); - let content = trim_newlines.replace_all(&content, ""); - let content = extraneous_spaces.replace_all(&content, " "); + let content = EXTRANEOUS_NEWLINES.replace_all(content, "\n"); + let content = TRIM_NEWLINES.replace_all(&content, ""); + let content = EXTRANEOUS_SPACES.replace_all(&content, " "); content.to_string() }