Skip to content

Commit

Permalink
custom parsing function instead of regex for speed
Browse files Browse the repository at this point in the history
  • Loading branch information
ljeub-pometry committed Jan 19, 2024
1 parent 45e4a31 commit 207c2f1
Showing 1 changed file with 41 additions and 14 deletions.
55 changes: 41 additions & 14 deletions raphtory/src/core/entities/nodes/input_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,46 @@
use crate::core::utils::hashing;
use regex::Regex;
const MAX_U64_BYTES: [u8; 20] = [
49, 56, 52, 52, 54, 55, 52, 52, 48, 55, 51, 55, 48, 57, 53, 53, 49, 54, 49, 53,
];

fn parse_u64_strict(input: &str) -> Option<u64> {
if input.len() > 20 {
/// a u64 string has at most 20 bytes
return None;
}
let byte_0 = b'0';
let byte_1 = b'1';
let byte_9 = b'9';
let mut input_iter = input.bytes();
let first = input_iter.next()?;
if first == byte_0 {
return input_iter.next().is_none().then_some(0);
}
if input.len() == 20 && (byte_1..=MAX_U64_BYTES[0]).contains(&first) {
let mut result = (first - byte_0) as u64;
for (next_byte, max_byte) in input_iter.zip(MAX_U64_BYTES[1..].iter().copied()) {
if !(byte_0..=max_byte).contains(&next_byte) {
return None;
}
result = result * 10 + (next_byte - byte_0) as u64;
}
return Some(result);
}
if (byte_1..=byte_9).contains(&first) {
let mut result = (first - byte_0) as u64;
for next_byte in input_iter {
if !(byte_0..=byte_9).contains(&next_byte) {
return None;
}
result = result * 10 + (next_byte - byte_0) as u64;
}
return Some(result);
}

None
}

pub trait InputNode: Clone {
fn id(&self) -> u64;
Expand All @@ -24,12 +64,7 @@ impl InputNode for u64 {

impl<'a> InputNode for &'a str {
fn id(&self) -> u64 {
let positive_strict_num = Regex::new(r"^[1-9][0-9]*$").unwrap();
if *self == "0" || positive_strict_num.is_match(self) {
self.parse().unwrap()
} else {
hashing::calculate_hash(self)
}
parse_u64_strict(self).unwrap_or_else(|| hashing::calculate_hash(self))
}

fn id_str(&self) -> Option<&str> {
Expand Down Expand Up @@ -60,12 +95,4 @@ mod test {
assert_ne!("00".id(), "0".id());
assert_eq!("0".id(), 0.id());
}

#[test]
fn test_num_regex() {
let re = Regex::new(r"^[1-9][0-9]*$").unwrap();
assert!(re.is_match("10"));
assert!(!re.is_match("00"));
assert!(!re.is_match("+3"));
}
}

0 comments on commit 207c2f1

Please sign in to comment.