Skip to content

Commit

Permalink
improve levenshtein implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
knickish committed Mar 31, 2024
1 parent 5fd9126 commit 2d4ec09
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 129 deletions.
179 changes: 102 additions & 77 deletions src/collections/ordered_array_like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,98 +90,123 @@ fn print_table(table: &Vec<Vec<ChangeInternal>>) {
pub fn levenshtein<'src, 'target: 'src, T: Clone + PartialEq + Debug + 'target>(
target: impl IntoIterator<Item = &'target T>,
source: impl IntoIterator<Item = &'src T>,
) -> Option<OrderedArrayLikeDiffRef<'src, T>> {
let target = target.into_iter().collect::<Vec<_>>();
let source = source.into_iter().collect::<Vec<_>>();
let mut table = vec![vec![ChangeInternal::NoOp(0); source.len() + 1]; target.len() + 1];

for (i, entry) in table.iter_mut().enumerate().skip(1) {
entry[0] = ChangeInternal::Insert(i);
}
) -> Option<OrderedArrayLikeDiffRef<'target, T>> {
#[inline]
fn create_full_change_table<T: PartialEq>(
target: &Vec<&T>,

Check warning on line 96 in src/collections/ordered_array_like.rs

View workflow job for this annotation

GitHub Actions / Clippy (ubuntu-latest, x86_64-unknown-linux-gnu)

writing `&Vec` instead of `&[_]` involves a new object where a slice will do
source: &Vec<&T>,

Check warning on line 97 in src/collections/ordered_array_like.rs

View workflow job for this annotation

GitHub Actions / Clippy (ubuntu-latest, x86_64-unknown-linux-gnu)

writing `&Vec` instead of `&[_]` involves a new object where a slice will do
) -> Vec<Vec<ChangeInternal>> {
let mut table = vec![vec![ChangeInternal::NoOp(0); source.len() + 1]; target.len() + 1];

for (i, entry) in table.iter_mut().enumerate().skip(1) {
entry[0] = ChangeInternal::Insert(i);
}

for j in 0..=source.len() {
table[0][j] = ChangeInternal::Delete(j)
}
for j in 0..=source.len() {
table[0][j] = ChangeInternal::Delete(j)
}

// create cost table
for target_index in 1..=target.len() {
let target_entry = target[target_index - 1];
for source_index in 1..=source.len() {
let source_entry = source[source_index - 1];

if target_entry == source_entry {
table[target_index][source_index] =
ChangeInternal::NoOp(table[target_index - 1][source_index - 1].cost());
// char matches, skip comparisons
continue;
}
// create cost table
for target_index in 1..=target.len() {
let target_entry = target[target_index - 1];
for source_index in 1..=source.len() {
let source_entry = source[source_index - 1];

if target_entry == source_entry {
table[target_index][source_index] =
ChangeInternal::NoOp(table[target_index - 1][source_index - 1].cost());
// char matches, skip comparisons
continue;
}

let insert = table[target_index - 1][source_index].cost();
let delete = table[target_index][source_index - 1].cost();
let replace = table[target_index - 1][source_index - 1].cost();
let min = insert.min(delete).min(replace);

if min == replace {
table[target_index][source_index] = ChangeInternal::Replace(min + 1);
} else if min == delete {
table[target_index][source_index] = ChangeInternal::Delete(min + 1);
} else {
table[target_index][source_index] = ChangeInternal::Insert(min + 1);
let insert = table[target_index - 1][source_index].cost();
let delete = table[target_index][source_index - 1].cost();
let replace = table[target_index - 1][source_index - 1].cost();
let min = insert.min(delete).min(replace);

if min == replace {
table[target_index][source_index] = ChangeInternal::Replace(min + 1);
} else if min == delete {
table[target_index][source_index] = ChangeInternal::Delete(min + 1);
} else {
table[target_index][source_index] = ChangeInternal::Insert(min + 1);
}
}
}
table
}

let mut target_pos = target.len();
let mut source_pos = source.len();
let mut changelist = Vec::new();
#[inline]
fn changelist_from_change_table<'target, T: PartialEq>(
table: Vec<Vec<ChangeInternal>>,
target: &Vec<&'target T>,

Check warning on line 142 in src/collections/ordered_array_like.rs

View workflow job for this annotation

GitHub Actions / Clippy (ubuntu-latest, x86_64-unknown-linux-gnu)

writing `&Vec` instead of `&[_]` involves a new object where a slice will do
source: &Vec<&T>,

Check warning on line 143 in src/collections/ordered_array_like.rs

View workflow job for this annotation

GitHub Actions / Clippy (ubuntu-latest, x86_64-unknown-linux-gnu)

writing `&Vec` instead of `&[_]` involves a new object where a slice will do
) -> Vec<OrderedArrayLikeChangeRef<'target, T>> {
let mut target_pos = target.len();
let mut source_pos = source.len();
let mut changelist = Vec::with_capacity(
table
.last()
.and_then(|r| r.last())
.map(|c| c.cost())
.unwrap_or_default(),
);

// collect required changes to make source into target
while target_pos > 0 && source_pos > 0 {
match &(table[target_pos][source_pos]) {
ChangeInternal::NoOp(_) => {
target_pos -= 1;
source_pos -= 1;
}
ChangeInternal::Replace(_) => {
changelist.push(OrderedArrayLikeChangeRef::Replace(
target[target_pos - 1],
source_pos - 1,
));
target_pos -= 1;
source_pos -= 1;
}
ChangeInternal::Insert(_) => {
changelist.push(OrderedArrayLikeChangeRef::Insert(
target[target_pos - 1],
source_pos,
));
target_pos -= 1;
// collect required changes to make source into target
while target_pos > 0 && source_pos > 0 {
match &(table[target_pos][source_pos]) {
ChangeInternal::NoOp(_) => {
target_pos -= 1;
source_pos -= 1;
}
ChangeInternal::Replace(_) => {
changelist.push(OrderedArrayLikeChangeRef::Replace(
target[target_pos - 1],
source_pos - 1,
));
target_pos -= 1;
source_pos -= 1;
}
ChangeInternal::Insert(_) => {
changelist.push(OrderedArrayLikeChangeRef::Insert(
target[target_pos - 1],
source_pos,
));
target_pos -= 1;
}
ChangeInternal::Delete(_) => {
changelist.push(OrderedArrayLikeChangeRef::Delete(source_pos - 1, None));
source_pos -= 1;
}
}
ChangeInternal::Delete(_) => {
changelist.push(OrderedArrayLikeChangeRef::Delete(source_pos - 1, None));
source_pos -= 1;
if changelist.len() == table[target.len()][source.len()].cost() {
target_pos = 0;
source_pos = 0;
break;
}
}
if changelist.len() == table[target.len()][source.len()].cost() {
target_pos = 0;
source_pos = 0;
break;

// target is longer than source, add the missing elements
while target_pos > 0 {
changelist.push(OrderedArrayLikeChangeRef::Insert(
target[target_pos - 1],
source_pos,
));
target_pos -= 1;
}

// source is longer than target, remove the extra elements
if source_pos > 0 {
changelist.push(OrderedArrayLikeChangeRef::Delete(0, Some(source_pos - 1)));
}
}

// target is longer than source, add the missing elements
while target_pos > 0 {
changelist.push(OrderedArrayLikeChangeRef::Insert(
target[target_pos - 1],
source_pos,
));
target_pos -= 1;
changelist
}

// source is longer than target, remove the extra elements
if source_pos > 0 {
changelist.push(OrderedArrayLikeChangeRef::Delete(0, Some(source_pos - 1)));
}
let target = target.into_iter().collect::<Vec<_>>();
let source = source.into_iter().collect::<Vec<_>>();
let table = create_full_change_table(&target, &source);
let changelist = changelist_from_change_table(table, &target, &source);

match changelist.is_empty() {
true => None,
Expand Down
86 changes: 34 additions & 52 deletions src/collections/unordered_map_like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,12 @@ impl<'a, K: Clone, V: Clone> From<UnorderedMapLikeChange<&'a K, &'a V>>
{
fn from(value: UnorderedMapLikeChange<&'a K, &'a V>) -> Self {
match value {
UnorderedMapLikeChange::InsertMany(
key,
value,
count,
) => UnorderedMapLikeChange::InsertMany(
key.clone(),
value.clone(),
count,
),
UnorderedMapLikeChange::RemoveMany(
key,
count,
) => UnorderedMapLikeChange::RemoveMany(
key.clone(),
count,
),
UnorderedMapLikeChange::InsertMany(key, value, count) => {
UnorderedMapLikeChange::InsertMany(key.clone(), value.clone(), count)
}
UnorderedMapLikeChange::RemoveMany(key, count) => {
UnorderedMapLikeChange::RemoveMany(key.clone(), count)
}
UnorderedMapLikeChange::InsertSingle(key, value) => {
UnorderedMapLikeChange::InsertSingle(key.clone(), value.clone())
}
Expand Down Expand Up @@ -142,21 +132,10 @@ impl<K, V> UnorderedMapLikeChange<K, V> {
debug_assert_ne!(count, 0);
match (insert_or_remove, count) {
(Operation::Insert, 1) => UnorderedMapLikeChange::InsertSingle(item.0, item.1),
(Operation::Insert, val) => {
UnorderedMapLikeChange::InsertMany(
item.0,
item.1,
val,
)
}
(Operation::Insert, val) => UnorderedMapLikeChange::InsertMany(item.0, item.1, val),
(Operation::Remove, 1) => UnorderedMapLikeChange::RemoveSingle(item.0),

(Operation::Remove, val) => {
UnorderedMapLikeChange::RemoveMany(
item.0,
val,
)
}
(Operation::Remove, val) => UnorderedMapLikeChange::RemoveMany(item.0, val),
}
}
}
Expand Down Expand Up @@ -279,20 +258,16 @@ pub fn apply_unordered_hashdiffs<
};

let (insertions, removals): (Vec<_>, Vec<_>) = diffs.into_iter().partition(|x| match &x {
UnorderedMapLikeChange::InsertMany(..)
| UnorderedMapLikeChange::InsertSingle(..) => true,
UnorderedMapLikeChange::RemoveMany(..)
| UnorderedMapLikeChange::RemoveSingle(..) => false,
UnorderedMapLikeChange::InsertMany(..) | UnorderedMapLikeChange::InsertSingle(..) => true,
UnorderedMapLikeChange::RemoveMany(..) | UnorderedMapLikeChange::RemoveSingle(..) => false,
});
let holder: Vec<_> = list.into_iter().collect();
// let ref_holder: Vec<_> = holder.iter().map(|(k, v)| (k, v)).collect();
let mut list_hash = collect_into_key_eq_map(holder.iter().map(|t| (&t.0, &t.1)));

for remove in removals {
match remove {
UnorderedMapLikeChange::RemoveMany(
key, count
) => match list_hash.get_mut(&key) {
UnorderedMapLikeChange::RemoveMany(key, count) => match list_hash.get_mut(&key) {
Some(val) if val.1 > count => {
val.1 -= count;
}
Expand All @@ -316,18 +291,16 @@ pub fn apply_unordered_hashdiffs<

for insertion in insertions.iter() {
match insertion {
UnorderedMapLikeChange::InsertMany(
key,
value,
count,
) => match list_hash.get_mut(&key) {
Some(val) => {
val.1 += count;
}
None => {
list_hash.insert(key, (value, *count));
UnorderedMapLikeChange::InsertMany(key, value, count) => {
match list_hash.get_mut(&key) {
Some(val) => {
val.1 += count;
}
None => {
list_hash.insert(key, (value, *count));
}
}
},
}
UnorderedMapLikeChange::InsertSingle(key, value) => match list_hash.get_mut(&key) {
Some(val) => {
val.1 += 1;
Expand Down Expand Up @@ -355,8 +328,7 @@ pub fn apply_unordered_hashdiffs<
#[cfg(feature = "nanoserde")]
mod nanoserde_impls {
use super::{
DeBin, SerBin, UnorderedMapLikeChange, UnorderedMapLikeDiff,
UnorderedMapLikeDiffInternal,
DeBin, SerBin, UnorderedMapLikeChange, UnorderedMapLikeDiff, UnorderedMapLikeDiffInternal,
};

impl<K, V> SerBin for UnorderedMapLikeChange<K, V>
Expand Down Expand Up @@ -484,9 +456,19 @@ mod nanoserde_impls {
) -> Result<UnorderedMapLikeChange<K, V>, nanoserde::DeBinErr> {
let id: u8 = DeBin::de_bin(offset, bytes)?;
core::result::Result::Ok(match id {
0_u8 => UnorderedMapLikeChange::InsertMany(DeBin::de_bin(offset, bytes)?, DeBin::de_bin(offset, bytes)?, DeBin::de_bin(offset, bytes)?),
1_u8 => UnorderedMapLikeChange::RemoveMany(DeBin::de_bin(offset, bytes)?, DeBin::de_bin(offset, bytes)?),
2_u8 => UnorderedMapLikeChange::InsertSingle(DeBin::de_bin(offset, bytes)?, DeBin::de_bin(offset, bytes)?),
0_u8 => UnorderedMapLikeChange::InsertMany(
DeBin::de_bin(offset, bytes)?,
DeBin::de_bin(offset, bytes)?,
DeBin::de_bin(offset, bytes)?,
),
1_u8 => UnorderedMapLikeChange::RemoveMany(
DeBin::de_bin(offset, bytes)?,
DeBin::de_bin(offset, bytes)?,
),
2_u8 => UnorderedMapLikeChange::InsertSingle(
DeBin::de_bin(offset, bytes)?,
DeBin::de_bin(offset, bytes)?,
),
3_u8 => UnorderedMapLikeChange::RemoveSingle(DeBin::de_bin(offset, bytes)?),
_ => {
return core::result::Result::Err(nanoserde::DeBinErr {
Expand Down

0 comments on commit 2d4ec09

Please sign in to comment.