Skip to content

Commit

Permalink
Fix string id edge cases (#1460)
Browse files Browse the repository at this point in the history
* fix number detection edge cases for string to id conversion and add some tests

* fix the test that was testing broken behaviour

* custom parsing function instead of regex for speed
  • Loading branch information
ljeub-pometry authored Jan 19, 2024
1 parent 4ee9c1a commit f26aa6f
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 7 deletions.
4 changes: 2 additions & 2 deletions python/tests/test_graphdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -1970,8 +1970,8 @@ def test_leading_zeroes_ids():
g = Graph()
g.add_node(0, 0)
g.add_node(1, "0")
assert g.node(0).history() == [0]
assert g.nodes.name.collect() == ["0", "0"]
assert g.node(0).history() == [0, 1]
assert g.nodes.name.collect() == ["0"]

# g = Graph()
# g.add_node(0, 1)
Expand Down
61 changes: 56 additions & 5 deletions raphtory/src/core/entities/nodes/input_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,47 @@
//! `u64`, `&str`, and `String`.
use crate::core::utils::hashing;
use regex::Regex;
const MAX_U64_BYTES: [u8; 20] = [
49, 56, 52, 52, 54, 55, 52, 52, 48, 55, 51, 55, 48, 57, 53, 53, 49, 54, 49, 53,
];

fn parse_u64_strict(input: &str) -> Option<u64> {
if input.len() > 20 {
/// a u64 string has at most 20 bytes
return None;
}
let byte_0 = b'0';
let byte_1 = b'1';
let byte_9 = b'9';
let mut input_iter = input.bytes();
let first = input_iter.next()?;
if first == byte_0 {
return input_iter.next().is_none().then_some(0);
}
if input.len() == 20 && (byte_1..=MAX_U64_BYTES[0]).contains(&first) {
let mut result = (first - byte_0) as u64;
for (next_byte, max_byte) in input_iter.zip(MAX_U64_BYTES[1..].iter().copied()) {
if !(byte_0..=max_byte).contains(&next_byte) {
return None;
}
result = result * 10 + (next_byte - byte_0) as u64;
}
return Some(result);
}
if (byte_1..=byte_9).contains(&first) {
let mut result = (first - byte_0) as u64;
for next_byte in input_iter {
if !(byte_0..=byte_9).contains(&next_byte) {
return None;
}
result = result * 10 + (next_byte - byte_0) as u64;
}
return Some(result);
}

None
}

pub trait InputNode: Clone {
fn id(&self) -> u64;
Expand All @@ -23,11 +64,7 @@ impl InputNode for u64 {

impl<'a> InputNode for &'a str {
fn id(&self) -> u64 {
if &self.chars().next().unwrap_or('0') != &'0' {
self.parse().unwrap_or(hashing::calculate_hash(self))
} else {
hashing::calculate_hash(self)
}
parse_u64_strict(self).unwrap_or_else(|| hashing::calculate_hash(self))
}

fn id_str(&self) -> Option<&str> {
Expand All @@ -45,3 +82,17 @@ impl InputNode for String {
Some(self)
}
}

#[cfg(test)]
mod test {
use crate::core::entities::nodes::input_node::InputNode;
use regex::Regex;

#[test]
fn test_weird_num_edge_cases() {
assert_ne!("+3".id(), "3".id());
assert_eq!(3.id(), "3".id());
assert_ne!("00".id(), "0".id());
assert_eq!("0".id(), 0.id());
}
}

0 comments on commit f26aa6f

Please sign in to comment.