Skip to content

Commit

Permalink
Treat ListingTableUrl as URL-encoded (apache#8009)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Oct 31, 2023
1 parent 656c6a9 commit d70b21a
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 11 deletions.
1 change: 0 additions & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ num_cpus = "1.13.0"
object_store = "0.7.0"
parking_lot = "0.12"
parquet = { workspace = true, optional = true }
percent-encoding = "2.2.0"
pin-project-lite = "^0.2.7"
rand = "0.8"
sqlparser = { workspace = true }
Expand Down
37 changes: 27 additions & 10 deletions datafusion/core/src/datasource/listing/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ use itertools::Itertools;
use log::debug;
use object_store::path::Path;
use object_store::{ObjectMeta, ObjectStore};
use percent_encoding;
use std::sync::Arc;
use url::Url;

Expand Down Expand Up @@ -86,7 +85,7 @@ impl ListingTableUrl {
}

match Url::parse(s) {
Ok(url) => Ok(Self::new(url, None)),
Ok(url) => Self::try_new(url, None),
Err(url::ParseError::RelativeUrlWithoutBase) => Self::parse_path(s),
Err(e) => Err(DataFusionError::External(Box::new(e))),
}
Expand Down Expand Up @@ -138,15 +137,13 @@ impl ListingTableUrl {
.map_err(|_| DataFusionError::Internal(format!("Can not open path: {s}")))?;
// TODO: Currently we do not have an IO-related error variant that accepts ()
// or a string. Once we have such a variant, change the error type above.
Ok(Self::new(url, glob))
Self::try_new(url, glob)
}

/// Creates a new [`ListingTableUrl`] from a url and optional glob expression
fn new(url: Url, glob: Option<Pattern>) -> Self {
let decoded_path =
percent_encoding::percent_decode_str(url.path()).decode_utf8_lossy();
let prefix = Path::from(decoded_path.as_ref());
Self { url, prefix, glob }
fn try_new(url: Url, glob: Option<Pattern>) -> Result<Self> {
let prefix = Path::from_url_path(url.path())?;
Ok(Self { url, prefix, glob })
}

/// Returns the URL scheme
Expand Down Expand Up @@ -286,6 +283,7 @@ fn split_glob_expression(path: &str) -> Option<(&str, &str)> {
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;

#[test]
fn test_prefix_path() {
Expand Down Expand Up @@ -317,8 +315,27 @@ mod tests {
let url = ListingTableUrl::parse("file:///foo/bar?").unwrap();
assert_eq!(url.prefix.as_ref(), "foo/bar");

let url = ListingTableUrl::parse("file:///foo/😺").unwrap();
assert_eq!(url.prefix.as_ref(), "foo/%F0%9F%98%BA");
let err = ListingTableUrl::parse("file:///foo/😺").unwrap_err();
assert_eq!(err.to_string(), "Object Store error: Encountered object with invalid path: Error parsing Path \"/foo/😺\": Encountered illegal character sequence \"😺\" whilst parsing path segment \"😺\"");

let url = ListingTableUrl::parse("file:///foo/bar%2Efoo").unwrap();
assert_eq!(url.prefix.as_ref(), "foo/bar.foo");

let url = ListingTableUrl::parse("file:///foo/bar%2Efoo").unwrap();
assert_eq!(url.prefix.as_ref(), "foo/bar.foo");

let url = ListingTableUrl::parse("file:///foo/bar%252Ffoo").unwrap();
assert_eq!(url.prefix.as_ref(), "foo/bar%2Ffoo");

let url = ListingTableUrl::parse("file:///foo/a%252Fb.txt").unwrap();
assert_eq!(url.prefix.as_ref(), "foo/a%2Fb.txt");

let dir = tempdir().unwrap();
let path = dir.path().join("bar%2Ffoo");
std::fs::File::create(&path).unwrap();

let url = ListingTableUrl::parse(path.to_str().unwrap()).unwrap();
assert!(url.prefix.as_ref().ends_with("bar%2Ffoo"), "{}", url.prefix);
}

#[test]
Expand Down

0 comments on commit d70b21a

Please sign in to comment.