Skip to content

Commit

Permalink
Avoid allocations
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Dec 9, 2022
1 parent 616342d commit 5d73cc4
Showing 1 changed file with 24 additions and 54 deletions.
78 changes: 24 additions & 54 deletions arrow-string/src/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -588,75 +588,45 @@ fn ilike_scalar_op<'a, F: Fn(bool) -> bool, L: ArrayAccessor<Item = &'a str>>(
right: &str,
op: F,
) -> Result<BooleanArray, ArrowError> {
if !right.contains(is_like_pattern) {
// fast path, can use equals
if right.is_ascii() {
// This avoids additional allocations
Ok(BooleanArray::from_unary(left, |item| {
// If not ASCII faster to use case insensitive regex than allocating using to_uppercase
if right.is_ascii() {
if !right.contains(is_like_pattern) {
return Ok(BooleanArray::from_unary(left, |item| {
op(item.eq_ignore_ascii_case(right))
}))
} else {
let right_uppercase = right.to_uppercase();
Ok(BooleanArray::from_unary(left, |item| {
op(item.to_uppercase() == right_uppercase)
}))
}
} else if right.ends_with('%')
&& !right.ends_with("\\%")
&& !right[..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use starts_with
if right.is_ascii() {
}));
} else if right.ends_with('%')
&& !right.ends_with("\\%")
&& !right[..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use starts_with
let start_str = &right[..right.len() - 1];
Ok(BooleanArray::from_unary(left, |item| {
return Ok(BooleanArray::from_unary(left, |item| {
let end = item.len().min(start_str.len());
let result = item.is_char_boundary(end)
&& start_str.eq_ignore_ascii_case(&item[..end]);
op(result)
}))
} else {
let start_str = &right[..right.len() - 1].to_uppercase();
Ok(BooleanArray::from_unary(left, |item| {
op(item.to_uppercase().starts_with(start_str))
}))
}
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
// fast path, can use ends_with
if right.is_ascii() {
}));
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
// fast path, can use ends_with
let ends_str = &right[1..];
Ok(BooleanArray::from_unary(left, |item| {
let start = item.len().saturating_sub(ends_str.len());
let result = item.is_char_boundary(start)
&& ends_str.eq_ignore_ascii_case(&item[start..]);
op(result)
}))
} else {
let ends_str = &right[1..].to_uppercase();
Ok(BooleanArray::from_unary(left, |item| {
op(item.to_uppercase().ends_with(ends_str))
}))
}
} else if right.starts_with('%')
&& right.ends_with('%')
&& !right.ends_with("\\%")
&& !right[1..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use contains
let contains = &right[1..right.len() - 1].to_uppercase();
Ok(BooleanArray::from_unary(left, |item| {
op(item.to_uppercase().contains(contains))
}))
} else {
let re_pattern = replace_like_wildcards(right)?;
let re = Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from ILIKE pattern: {}",
e
))
})?;

Ok(BooleanArray::from_unary(left, |item| op(re.is_match(item))))
}

let re_pattern = replace_like_wildcards(right)?;
let re = Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from ILIKE pattern: {}",
e
))
})?;

Ok(BooleanArray::from_unary(left, |item| op(re.is_match(item))))
}

#[inline]
Expand Down

0 comments on commit 5d73cc4

Please sign in to comment.