From 2d4ec099c1d3111dd5e2ca6b324a67a334ad4744 Mon Sep 17 00:00:00 2001 From: kirk Date: Thu, 28 Mar 2024 19:33:07 +0000 Subject: [PATCH] improve levenshtein implementation --- src/collections/ordered_array_like.rs | 179 +++++++++++++++----------- src/collections/unordered_map_like.rs | 86 +++++-------- 2 files changed, 136 insertions(+), 129 deletions(-) diff --git a/src/collections/ordered_array_like.rs b/src/collections/ordered_array_like.rs index 6a0e51f..5d7c456 100644 --- a/src/collections/ordered_array_like.rs +++ b/src/collections/ordered_array_like.rs @@ -90,98 +90,123 @@ fn print_table(table: &Vec>) { pub fn levenshtein<'src, 'target: 'src, T: Clone + PartialEq + Debug + 'target>( target: impl IntoIterator, source: impl IntoIterator, -) -> Option> { - let target = target.into_iter().collect::>(); - let source = source.into_iter().collect::>(); - let mut table = vec![vec![ChangeInternal::NoOp(0); source.len() + 1]; target.len() + 1]; - - for (i, entry) in table.iter_mut().enumerate().skip(1) { - entry[0] = ChangeInternal::Insert(i); - } +) -> Option> { + #[inline] + fn create_full_change_table( + target: &Vec<&T>, + source: &Vec<&T>, + ) -> Vec> { + let mut table = vec![vec![ChangeInternal::NoOp(0); source.len() + 1]; target.len() + 1]; + + for (i, entry) in table.iter_mut().enumerate().skip(1) { + entry[0] = ChangeInternal::Insert(i); + } - for j in 0..=source.len() { - table[0][j] = ChangeInternal::Delete(j) - } + for j in 0..=source.len() { + table[0][j] = ChangeInternal::Delete(j) + } - // create cost table - for target_index in 1..=target.len() { - let target_entry = target[target_index - 1]; - for source_index in 1..=source.len() { - let source_entry = source[source_index - 1]; - - if target_entry == source_entry { - table[target_index][source_index] = - ChangeInternal::NoOp(table[target_index - 1][source_index - 1].cost()); - // char matches, skip comparisons - continue; - } + // create cost table + for target_index in 1..=target.len() { + let target_entry = target[target_index - 1]; + for source_index in 1..=source.len() { + let source_entry = source[source_index - 1]; + + if target_entry == source_entry { + table[target_index][source_index] = + ChangeInternal::NoOp(table[target_index - 1][source_index - 1].cost()); + // char matches, skip comparisons + continue; + } - let insert = table[target_index - 1][source_index].cost(); - let delete = table[target_index][source_index - 1].cost(); - let replace = table[target_index - 1][source_index - 1].cost(); - let min = insert.min(delete).min(replace); - - if min == replace { - table[target_index][source_index] = ChangeInternal::Replace(min + 1); - } else if min == delete { - table[target_index][source_index] = ChangeInternal::Delete(min + 1); - } else { - table[target_index][source_index] = ChangeInternal::Insert(min + 1); + let insert = table[target_index - 1][source_index].cost(); + let delete = table[target_index][source_index - 1].cost(); + let replace = table[target_index - 1][source_index - 1].cost(); + let min = insert.min(delete).min(replace); + + if min == replace { + table[target_index][source_index] = ChangeInternal::Replace(min + 1); + } else if min == delete { + table[target_index][source_index] = ChangeInternal::Delete(min + 1); + } else { + table[target_index][source_index] = ChangeInternal::Insert(min + 1); + } } } + table } - let mut target_pos = target.len(); - let mut source_pos = source.len(); - let mut changelist = Vec::new(); + #[inline] + fn changelist_from_change_table<'target, T: PartialEq>( + table: Vec>, + target: &Vec<&'target T>, + source: &Vec<&T>, + ) -> Vec> { + let mut target_pos = target.len(); + let mut source_pos = source.len(); + let mut changelist = Vec::with_capacity( + table + .last() + .and_then(|r| r.last()) + .map(|c| c.cost()) + .unwrap_or_default(), + ); - // collect required changes to make source into target - while target_pos > 0 && source_pos > 0 { - match &(table[target_pos][source_pos]) { - ChangeInternal::NoOp(_) => { - target_pos -= 1; - source_pos -= 1; - } - ChangeInternal::Replace(_) => { - changelist.push(OrderedArrayLikeChangeRef::Replace( - target[target_pos - 1], - source_pos - 1, - )); - target_pos -= 1; - source_pos -= 1; - } - ChangeInternal::Insert(_) => { - changelist.push(OrderedArrayLikeChangeRef::Insert( - target[target_pos - 1], - source_pos, - )); - target_pos -= 1; + // collect required changes to make source into target + while target_pos > 0 && source_pos > 0 { + match &(table[target_pos][source_pos]) { + ChangeInternal::NoOp(_) => { + target_pos -= 1; + source_pos -= 1; + } + ChangeInternal::Replace(_) => { + changelist.push(OrderedArrayLikeChangeRef::Replace( + target[target_pos - 1], + source_pos - 1, + )); + target_pos -= 1; + source_pos -= 1; + } + ChangeInternal::Insert(_) => { + changelist.push(OrderedArrayLikeChangeRef::Insert( + target[target_pos - 1], + source_pos, + )); + target_pos -= 1; + } + ChangeInternal::Delete(_) => { + changelist.push(OrderedArrayLikeChangeRef::Delete(source_pos - 1, None)); + source_pos -= 1; + } } - ChangeInternal::Delete(_) => { - changelist.push(OrderedArrayLikeChangeRef::Delete(source_pos - 1, None)); - source_pos -= 1; + if changelist.len() == table[target.len()][source.len()].cost() { + target_pos = 0; + source_pos = 0; + break; } } - if changelist.len() == table[target.len()][source.len()].cost() { - target_pos = 0; - source_pos = 0; - break; + + // target is longer than source, add the missing elements + while target_pos > 0 { + changelist.push(OrderedArrayLikeChangeRef::Insert( + target[target_pos - 1], + source_pos, + )); + target_pos -= 1; + } + + // source is longer than target, remove the extra elements + if source_pos > 0 { + changelist.push(OrderedArrayLikeChangeRef::Delete(0, Some(source_pos - 1))); } - } - // target is longer than source, add the missing elements - while target_pos > 0 { - changelist.push(OrderedArrayLikeChangeRef::Insert( - target[target_pos - 1], - source_pos, - )); - target_pos -= 1; + changelist } - // source is longer than target, remove the extra elements - if source_pos > 0 { - changelist.push(OrderedArrayLikeChangeRef::Delete(0, Some(source_pos - 1))); - } + let target = target.into_iter().collect::>(); + let source = source.into_iter().collect::>(); + let table = create_full_change_table(&target, &source); + let changelist = changelist_from_change_table(table, &target, &source); match changelist.is_empty() { true => None, diff --git a/src/collections/unordered_map_like.rs b/src/collections/unordered_map_like.rs index 4274d99..b247cd1 100644 --- a/src/collections/unordered_map_like.rs +++ b/src/collections/unordered_map_like.rs @@ -24,22 +24,12 @@ impl<'a, K: Clone, V: Clone> From> { fn from(value: UnorderedMapLikeChange<&'a K, &'a V>) -> Self { match value { - UnorderedMapLikeChange::InsertMany( - key, - value, - count, - ) => UnorderedMapLikeChange::InsertMany( - key.clone(), - value.clone(), - count, - ), - UnorderedMapLikeChange::RemoveMany( - key, - count, - ) => UnorderedMapLikeChange::RemoveMany( - key.clone(), - count, - ), + UnorderedMapLikeChange::InsertMany(key, value, count) => { + UnorderedMapLikeChange::InsertMany(key.clone(), value.clone(), count) + } + UnorderedMapLikeChange::RemoveMany(key, count) => { + UnorderedMapLikeChange::RemoveMany(key.clone(), count) + } UnorderedMapLikeChange::InsertSingle(key, value) => { UnorderedMapLikeChange::InsertSingle(key.clone(), value.clone()) } @@ -142,21 +132,10 @@ impl UnorderedMapLikeChange { debug_assert_ne!(count, 0); match (insert_or_remove, count) { (Operation::Insert, 1) => UnorderedMapLikeChange::InsertSingle(item.0, item.1), - (Operation::Insert, val) => { - UnorderedMapLikeChange::InsertMany( - item.0, - item.1, - val, - ) - } + (Operation::Insert, val) => UnorderedMapLikeChange::InsertMany(item.0, item.1, val), (Operation::Remove, 1) => UnorderedMapLikeChange::RemoveSingle(item.0), - (Operation::Remove, val) => { - UnorderedMapLikeChange::RemoveMany( - item.0, - val, - ) - } + (Operation::Remove, val) => UnorderedMapLikeChange::RemoveMany(item.0, val), } } } @@ -279,10 +258,8 @@ pub fn apply_unordered_hashdiffs< }; let (insertions, removals): (Vec<_>, Vec<_>) = diffs.into_iter().partition(|x| match &x { - UnorderedMapLikeChange::InsertMany(..) - | UnorderedMapLikeChange::InsertSingle(..) => true, - UnorderedMapLikeChange::RemoveMany(..) - | UnorderedMapLikeChange::RemoveSingle(..) => false, + UnorderedMapLikeChange::InsertMany(..) | UnorderedMapLikeChange::InsertSingle(..) => true, + UnorderedMapLikeChange::RemoveMany(..) | UnorderedMapLikeChange::RemoveSingle(..) => false, }); let holder: Vec<_> = list.into_iter().collect(); // let ref_holder: Vec<_> = holder.iter().map(|(k, v)| (k, v)).collect(); @@ -290,9 +267,7 @@ pub fn apply_unordered_hashdiffs< for remove in removals { match remove { - UnorderedMapLikeChange::RemoveMany( - key, count - ) => match list_hash.get_mut(&key) { + UnorderedMapLikeChange::RemoveMany(key, count) => match list_hash.get_mut(&key) { Some(val) if val.1 > count => { val.1 -= count; } @@ -316,18 +291,16 @@ pub fn apply_unordered_hashdiffs< for insertion in insertions.iter() { match insertion { - UnorderedMapLikeChange::InsertMany( - key, - value, - count, - ) => match list_hash.get_mut(&key) { - Some(val) => { - val.1 += count; - } - None => { - list_hash.insert(key, (value, *count)); + UnorderedMapLikeChange::InsertMany(key, value, count) => { + match list_hash.get_mut(&key) { + Some(val) => { + val.1 += count; + } + None => { + list_hash.insert(key, (value, *count)); + } } - }, + } UnorderedMapLikeChange::InsertSingle(key, value) => match list_hash.get_mut(&key) { Some(val) => { val.1 += 1; @@ -355,8 +328,7 @@ pub fn apply_unordered_hashdiffs< #[cfg(feature = "nanoserde")] mod nanoserde_impls { use super::{ - DeBin, SerBin, UnorderedMapLikeChange, UnorderedMapLikeDiff, - UnorderedMapLikeDiffInternal, + DeBin, SerBin, UnorderedMapLikeChange, UnorderedMapLikeDiff, UnorderedMapLikeDiffInternal, }; impl SerBin for UnorderedMapLikeChange @@ -484,9 +456,19 @@ mod nanoserde_impls { ) -> Result, nanoserde::DeBinErr> { let id: u8 = DeBin::de_bin(offset, bytes)?; core::result::Result::Ok(match id { - 0_u8 => UnorderedMapLikeChange::InsertMany(DeBin::de_bin(offset, bytes)?, DeBin::de_bin(offset, bytes)?, DeBin::de_bin(offset, bytes)?), - 1_u8 => UnorderedMapLikeChange::RemoveMany(DeBin::de_bin(offset, bytes)?, DeBin::de_bin(offset, bytes)?), - 2_u8 => UnorderedMapLikeChange::InsertSingle(DeBin::de_bin(offset, bytes)?, DeBin::de_bin(offset, bytes)?), + 0_u8 => UnorderedMapLikeChange::InsertMany( + DeBin::de_bin(offset, bytes)?, + DeBin::de_bin(offset, bytes)?, + DeBin::de_bin(offset, bytes)?, + ), + 1_u8 => UnorderedMapLikeChange::RemoveMany( + DeBin::de_bin(offset, bytes)?, + DeBin::de_bin(offset, bytes)?, + ), + 2_u8 => UnorderedMapLikeChange::InsertSingle( + DeBin::de_bin(offset, bytes)?, + DeBin::de_bin(offset, bytes)?, + ), 3_u8 => UnorderedMapLikeChange::RemoveSingle(DeBin::de_bin(offset, bytes)?), _ => { return core::result::Result::Err(nanoserde::DeBinErr {