Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix s3 prefix path truncations #263

Merged
merged 7 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Fixed bug where `S3Bucket::list_objects` was truncating prefix paths ending with slashes.
- Fixed bug where incorrect credentials model was selected when `MinIOCredentials` was used with `S3Bucket` - [#254](https://github.com/PrefectHQ/prefect-aws/pull/254)

### 0.3.1
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ pip install prefect-aws

### Contributing

Thanks for thinking about chipping in! Check out this [step-by-step guide](https://prefecthq.github.io/prefect-gcp/#installation) on how to get started.
Thanks for thinking about chipping in! Check out this [step-by-step guide](https://prefecthq.github.io/prefect-aws/#installation) on how to get started.
5 changes: 4 additions & 1 deletion prefect_aws/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,10 @@ def _join_bucket_folder(self, bucket_path: str = "") -> str:
f"Bucket path {bucket_path!r} is already prefixed with "
f"bucket folder {self.bucket_folder!r}; is this intentional?"
)
return (Path(self.bucket_folder) / bucket_path).as_posix()

return (Path(self.bucket_folder) / bucket_path).as_posix() + (
"" if not bucket_path.endswith("/") else "/"
)

@sync_compatible
async def list_objects(
Expand Down
77 changes: 77 additions & 0 deletions tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,25 @@ def object_in_folder(bucket, tmp_path):
return bucket.upload_fileobj(f, "folder/object")


@pytest.fixture
def objects_in_folder(bucket, tmp_path):
objects = []
for filename in [
"folderobject/foo.txt",
"folderobject/bar.txt",
"folder/object/foo.txt",
"folder/object/bar.txt",
]:
file = tmp_path / filename
file.parent.mkdir(parents=True, exist_ok=True)
file.write_text("TEST OBJECTS IN FOLDER")
with open(file, "rb") as f:
filename = Path(filename)
obj = bucket.upload_fileobj(f, (filename.parent / filename.stem).as_posix())
objects.append(obj)
return objects


@pytest.fixture
def a_lot_of_objects(bucket, tmp_path):
objects = []
Expand Down Expand Up @@ -241,6 +260,36 @@ async def test_flow():
assert [object["Key"] for object in objects] == ["folder/object"]


@pytest.mark.parametrize("client_parameters", aws_clients, indirect=True)
async def test_s3_list_objects_prefix_slashes(
object, client_parameters, objects_in_folder, aws_credentials
):
@flow
async def test_flow(slash=False):
return await s3_list_objects(
bucket="bucket",
prefix="folder" + ("/" if slash else ""),
aws_credentials=aws_credentials,
aws_client_parameters=client_parameters,
)

objects = await test_flow(slash=True)
assert len(objects) == 2
assert [object["Key"] for object in objects] == [
"folder/object/bar",
"folder/object/foo",
]

objects = await test_flow(slash=False)
assert len(objects) == 4
assert [object["Key"] for object in objects] == [
"folder/object/bar",
"folder/object/foo",
"folderobject/bar",
"folderobject/foo",
]


@pytest.mark.parametrize("client_parameters", aws_clients, indirect=True)
async def test_s3_list_objects_filter(
object, client_parameters, object_in_folder, aws_credentials
Expand Down Expand Up @@ -585,6 +634,13 @@ def s3_bucket_with_objects(self, s3_bucket_with_object, object_in_folder):
)
return _s3_bucket_with_objects

@pytest.fixture
def s3_bucket_with_similar_objects(self, s3_bucket_with_objects, objects_in_folder):
_s3_bucket_with_multiple_objects = (
s3_bucket_with_objects # objects in folder will be added
)
return _s3_bucket_with_multiple_objects

def test_credentials_are_correct_type(self, credentials):
s3_bucket = S3Bucket(bucket_name="bucket", credentials=credentials)
assert isinstance(s3_bucket.credentials, type(credentials))
Expand All @@ -605,6 +661,27 @@ def test_list_objects(self, s3_bucket_with_objects, client_parameters):
assert len(objects) == 2
assert [object["Key"] for object in objects] == ["folder/object", "object"]

@pytest.mark.parametrize("client_parameters", aws_clients[-1:], indirect=True)
def test_list_objects_with_params(
self, s3_bucket_with_similar_objects, client_parameters
):
objects = s3_bucket_with_similar_objects.list_objects("folder/object/")
assert len(objects) == 2
assert [object["Key"] for object in objects] == [
"folder/object/bar",
"folder/object/foo",
]

objects = s3_bucket_with_similar_objects.list_objects("folder")
assert len(objects) == 5
assert [object["Key"] for object in objects] == [
"folder/object",
"folder/object/bar",
"folder/object/foo",
"folderobject/bar",
"folderobject/foo",
]

@pytest.mark.parametrize("to_path", [Path("to_path"), "to_path", None])
@pytest.mark.parametrize("client_parameters", aws_clients[-1:], indirect=True)
def test_download_object_to_path(
Expand Down