Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix s3 prefix path truncations #263

Merged
merged 7 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Fixed bug where `S3Bucket::list_objects` was truncating prefix paths ending with slashes.
- Fixed bug where incorrect credentials model was selected when `MinIOCredentials` was used with `S3Bucket` - [#254](https://github.com/PrefectHQ/prefect-aws/pull/254)

### 0.3.1
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ pip install prefect-aws

### Contributing

Thanks for thinking about chipping in! Check out this [step-by-step guide](https://prefecthq.github.io/prefect-gcp/#installation) on how to get started.
Thanks for thinking about chipping in! Check out this [step-by-step guide](https://prefecthq.github.io/prefect-aws/#installation) on how to get started.
5 changes: 4 additions & 1 deletion prefect_aws/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,10 @@ def _join_bucket_folder(self, bucket_path: str = "") -> str:
f"Bucket path {bucket_path!r} is already prefixed with "
f"bucket folder {self.bucket_folder!r}; is this intentional?"
)
return (Path(self.bucket_folder) / bucket_path).as_posix()

return (Path(self.bucket_folder) / bucket_path).as_posix() + (
"" if not os.path.join(bucket_path).endswith(os.sep) else os.sep
tinvaan marked this conversation as resolved.
Show resolved Hide resolved
)

@sync_compatible
async def list_objects(
Expand Down
77 changes: 77 additions & 0 deletions tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,25 @@ def object_in_folder(bucket, tmp_path):
return bucket.upload_fileobj(f, "folder/object")


@pytest.fixture
def objects_in_folder(bucket, tmp_path):
objects = []
for filename in [
"folderobject/foo.txt",
"folderobject/bar.txt",
"folder/object/foo.txt",
"folder/object/bar.txt",
]:
file = tmp_path / filename
file.parent.mkdir(parents=True, exist_ok=True)
file.write_text("TEST OBJECTS IN FOLDER")
with open(file, "rb") as f:
filename = Path(filename)
obj = bucket.upload_fileobj(f, (filename.parent / filename.stem).as_posix())
objects.append(obj)
return objects


@pytest.fixture
def a_lot_of_objects(bucket, tmp_path):
objects = []
Expand Down Expand Up @@ -241,6 +260,36 @@ async def test_flow():
assert [object["Key"] for object in objects] == ["folder/object"]


@pytest.mark.parametrize("client_parameters", aws_clients, indirect=True)
async def test_s3_list_objects_prefix_slashes(
object, client_parameters, objects_in_folder, aws_credentials
):
@flow
async def test_flow(slash=False):
return await s3_list_objects(
bucket="bucket",
prefix="folder" + (os.sep if slash else ""),
aws_credentials=aws_credentials,
aws_client_parameters=client_parameters,
)

objects = await test_flow(slash=True)
assert len(objects) == 2
assert [object["Key"] for object in objects] == [
"folder/object/bar",
"folder/object/foo",
]

objects = await test_flow(slash=False)
assert len(objects) == 4
assert [object["Key"] for object in objects] == [
"folder/object/bar",
"folder/object/foo",
"folderobject/bar",
"folderobject/foo",
]


@pytest.mark.parametrize("client_parameters", aws_clients, indirect=True)
async def test_s3_list_objects_filter(
object, client_parameters, object_in_folder, aws_credentials
Expand Down Expand Up @@ -585,6 +634,13 @@ def s3_bucket_with_objects(self, s3_bucket_with_object, object_in_folder):
)
return _s3_bucket_with_objects

@pytest.fixture
def s3_bucket_with_similar_objects(self, s3_bucket_with_objects, objects_in_folder):
_s3_bucket_with_multiple_objects = (
s3_bucket_with_objects # objects in folder will be added
)
return _s3_bucket_with_multiple_objects

def test_credentials_are_correct_type(self, credentials):
s3_bucket = S3Bucket(bucket_name="bucket", credentials=credentials)
assert isinstance(s3_bucket.credentials, type(credentials))
Expand All @@ -605,6 +661,27 @@ def test_list_objects(self, s3_bucket_with_objects, client_parameters):
assert len(objects) == 2
assert [object["Key"] for object in objects] == ["folder/object", "object"]

@pytest.mark.parametrize("client_parameters", aws_clients[-1:], indirect=True)
def test_list_objects_with_params(
self, s3_bucket_with_similar_objects, client_parameters
):
objects = s3_bucket_with_similar_objects.list_objects("folder/object/")
assert len(objects) == 2
assert [object["Key"] for object in objects] == [
"folder/object/bar",
"folder/object/foo",
]

objects = s3_bucket_with_similar_objects.list_objects("folder")
assert len(objects) == 5
assert [object["Key"] for object in objects] == [
"folder/object",
"folder/object/bar",
"folder/object/foo",
"folderobject/bar",
"folderobject/foo",
]

@pytest.mark.parametrize("to_path", [Path("to_path"), "to_path", None])
@pytest.mark.parametrize("client_parameters", aws_clients[-1:], indirect=True)
def test_download_object_to_path(
Expand Down