Skip to content

Commit

Permalink
only boost similarity in Jaro-Winkler once the Jaro similarity exceed…
Browse files Browse the repository at this point in the history
…s 0.7
  • Loading branch information
maxbachmann committed Dec 31, 2023
1 parent d91d06d commit 5b1fe08
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 9 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ This project attempts to adhere to [Semantic Versioning](http://semver.org).

## [Unreleased]

### Changed

- only boost similarity in Jaro-Winkler once the Jaro similarity exceeds 0.7

### Fixed

- Fix transposition counting in Jaro and Jaro-Winkler.
Expand Down
22 changes: 13 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,16 +177,20 @@ where
&'b Iter2: IntoIterator<Item = Elem2>,
Elem1: PartialEq<Elem2>,
{
let jaro_distance = generic_jaro(a, b);
let sim = generic_jaro(a, b);

let prefix_length = a
.into_iter()
.take(4)
.zip(b.into_iter())
.take_while(|(a_elem, b_elem)| a_elem == b_elem)
.count();
if sim > 0.7 {
let prefix_length = a
.into_iter()
.take(4)
.zip(b.into_iter())
.take_while(|(a_elem, b_elem)| a_elem == b_elem)
.count();

return jaro_distance + 0.1 * prefix_length as f64 * (1.0 - jaro_distance);
sim + 0.1 * prefix_length as f64 * (1.0 - sim)
} else {
sim
}
}

/// Like Jaro but gives a boost to strings that have a common prefix.
Expand Down Expand Up @@ -668,7 +672,7 @@ mod tests {

#[test]
fn jaro_winkler_names() {
assert!((0.562 - jaro_winkler("Friedrich Nietzsche", "Fran-Paul Sartre")).abs() < 0.001);
assert!((0.452 - jaro_winkler("Friedrich Nietzsche", "Fran-Paul Sartre")).abs() < 0.001);
}

#[test]
Expand Down

0 comments on commit 5b1fe08

Please sign in to comment.