From 121ae3d6214ead8cec217b259e807382c732bb7e Mon Sep 17 00:00:00 2001 From: Dominic Tarro <57306102+dominictarro@users.noreply.github.com> Date: Fri, 6 Jan 2023 15:43:03 -0500 Subject: [PATCH 1/3] Made S3Bucket._resolve_path Windows-friendly Read/write from Windows OS produced object keys with backslashes and no folder hierarchy. This was due to `pathlib` inferring backslashes on Windows when joining `bucket_path`. Backslashes are frowned upon in the [AWS Object key naming](https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html) guidelines. Explicitly made `Path` join `bucket_path` POSIX path string. --- CHANGELOG.md | 2 ++ prefect_aws/s3.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ef801e3..f843f7b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- `S3Bucket._resolve_path` joins basepath and path with '/' instead of system default + ### Deprecated ### Removed diff --git a/prefect_aws/s3.py b/prefect_aws/s3.py index 330428b6..ae4f1aa4 100644 --- a/prefect_aws/s3.py +++ b/prefect_aws/s3.py @@ -391,7 +391,10 @@ def _resolve_path(self, path: str) -> str: bucket_folder = self.bucket_folder or self.basepath # If basepath provided, it means we won't write to the root dir of # the bucket. So we need to add it on the front of the path. - path = str(Path(bucket_folder) / path) if bucket_folder else path + # + # AWS object key naming guidelines require '/' for bucket folders. + # Get POSIX path to prevent `pathlib` from inferring '\' on Windows OS + path = (Path(bucket_folder) / path).as_posix() if bucket_folder else path return path From 684374f0a21c5054a36a6b31506f2d885460c15f Mon Sep 17 00:00:00 2001 From: Dominic Tarro <57306102+dominictarro@users.noreply.github.com> Date: Sat, 7 Jan 2023 15:51:25 -0500 Subject: [PATCH 2/3] Object keys use '/' delimiters instead of system default When object keys are sent to S3 or are shown in logs, it is always in POSIX form. '\' is not recognized as a delimiter on POSIX systems. Paths containing '\' will not be converted to '/' unless passed as a `PureWindowsPath` object. Windows systems always recognize and convert '\' to '/'. --- CHANGELOG.md | 2 +- prefect_aws/s3.py | 18 +++++++++++------- tests/test_s3.py | 21 ++++++++++++++++----- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f843f7b2..80d4c590 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- `S3Bucket._resolve_path` joins basepath and path with '/' instead of system default +- Object keys sent in S3 requests use '/' delimiters instead of system default ### Deprecated diff --git a/prefect_aws/s3.py b/prefect_aws/s3.py index ae4f1aa4..37a90f3b 100644 --- a/prefect_aws/s3.py +++ b/prefect_aws/s3.py @@ -4,7 +4,7 @@ import os import uuid import warnings -from pathlib import Path +from pathlib import Path, PurePath from typing import Any, BinaryIO, Dict, List, Optional, Union from uuid import uuid4 @@ -305,8 +305,8 @@ def cast_pathlib(cls, value): when the S3Bucket block is instantiated. """ - if isinstance(value, Path): - return str(value) + if issubclass(value.__class__, PurePath): + return value.as_posix() return value @validator("basepath", pre=True) @@ -533,7 +533,9 @@ async def put_directory( with open(local_file_path, "rb") as local_file: local_file_content = local_file.read() - await self.write_path(str(remote_file_path), content=local_file_content) + await self.write_path( + remote_file_path.as_posix(), content=local_file_content + ) uploaded_file_count += 1 return uploaded_file_count @@ -674,7 +676,7 @@ def _join_bucket_folder(self, bucket_path: str = "") -> str: f"Bucket path {bucket_path!r} is already prefixed with " f"bucket folder {self.bucket_folder!r}; is this intentional?" ) - return str(Path(self.bucket_folder) / bucket_path) + return (Path(self.bucket_folder) / bucket_path).as_posix() @sync_compatible async def list_objects( @@ -896,7 +898,7 @@ async def download_folder_to_path( to_path = str(to_path) # must be string self.logger.info( f"Downloading object from bucket {self.bucket_name!r} path " - f"{str(bucket_path)!r} to {to_path!r}." + f"{bucket_path.as_posix()!r} to {to_path!r}." ) async_coros.append( run_sync_in_worker_thread( @@ -1054,7 +1056,9 @@ async def upload_from_folder( # `my_folder/notes.txt` will be uploaded if from_path.is_dir(): continue - bucket_path = str(Path(bucket_folder) / from_path.relative_to(from_folder)) + bucket_path = ( + Path(bucket_folder) / from_path.relative_to(from_folder) + ).as_posix() self.logger.info( f"Uploading from {str(from_path)!r} to the bucket " f"{self.bucket_name!r} path {bucket_path!r}." diff --git a/tests/test_s3.py b/tests/test_s3.py index fb618ac3..5e5927a8 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -1,6 +1,6 @@ import io import os -from pathlib import Path +from pathlib import Path, PurePosixPath, PureWindowsPath import boto3 import pytest @@ -363,20 +363,31 @@ async def test_read_fails_does_not_exist(s3_bucket): await s3_bucket.read_path("test_bucket/foo/bar") -async def test_aws_basepath(s3_bucket, aws_creds_block): - +@pytest.mark.parametrize("type_", [PureWindowsPath, PurePosixPath, str]) +@pytest.mark.parametrize("delimiter", ["\\", "/"]) +async def test_aws_basepath(s3_bucket, aws_creds_block, delimiter, type_): """Test the basepath functionality.""" # create a new block with a subfolder s3_bucket_block = S3Bucket( bucket_name=BUCKET_NAME, aws_credentials=aws_creds_block, - basepath="subfolder", + basepath=type_(f"subfolder{delimiter}subsubfolder"), ) key = await s3_bucket_block.write_path("test.txt", content=b"hello") assert await s3_bucket_block.read_path("test.txt") == b"hello" - assert key == "subfolder/test.txt" + + expected: str = "subfolder/subsubfolder/test.txt" + if ( + delimiter == "\\" + and type_ != PureWindowsPath + and (os.sep != "\\" and os.altsep != "\\") + ): + # In this case, \\ will not be recognized as a delimiter + # This case is triggered on POSIX systems + expected = "subfolder\\subsubfolder/test.txt" + assert key == expected async def test_get_directory( From a08d523d67f98c94f82a53cef38a08ce1a56d13e Mon Sep 17 00:00:00 2001 From: Dominic Tarro <57306102+dominictarro@users.noreply.github.com> Date: Wed, 18 Jan 2023 10:31:17 -0500 Subject: [PATCH 3/3] Update CHANGELOG.md Co-authored-by: Andrew <15331990+ahuang11@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80d4c590..f00cc02e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Object keys sent in S3 requests use '/' delimiters instead of system default +- Object keys sent in S3 requests use '/' delimiters instead of system default - [#192](https://github.com/PrefectHQ/prefect-aws/pull/192) ### Deprecated