Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Made S3Bucket._resolve_path Windows-friendly #192

Merged
merged 3 commits into from
Jan 18, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed

- Object keys sent in S3 requests use '/' delimiters instead of system default
dominictarro marked this conversation as resolved.
Show resolved Hide resolved

### Deprecated

### Removed
Expand Down
23 changes: 15 additions & 8 deletions prefect_aws/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import uuid
import warnings
from pathlib import Path
from pathlib import Path, PurePath
from typing import Any, BinaryIO, Dict, List, Optional, Union
from uuid import uuid4

Expand Down Expand Up @@ -305,8 +305,8 @@ def cast_pathlib(cls, value):
when the S3Bucket block is instantiated.
"""

if isinstance(value, Path):
return str(value)
if issubclass(value.__class__, PurePath):
return value.as_posix()
return value

@validator("basepath", pre=True)
Expand Down Expand Up @@ -391,7 +391,10 @@ def _resolve_path(self, path: str) -> str:
bucket_folder = self.bucket_folder or self.basepath
# If basepath provided, it means we won't write to the root dir of
# the bucket. So we need to add it on the front of the path.
path = str(Path(bucket_folder) / path) if bucket_folder else path
#
# AWS object key naming guidelines require '/' for bucket folders.
# Get POSIX path to prevent `pathlib` from inferring '\' on Windows OS
path = (Path(bucket_folder) / path).as_posix() if bucket_folder else path

return path

Expand Down Expand Up @@ -530,7 +533,9 @@ async def put_directory(
with open(local_file_path, "rb") as local_file:
local_file_content = local_file.read()

await self.write_path(str(remote_file_path), content=local_file_content)
await self.write_path(
remote_file_path.as_posix(), content=local_file_content
)
uploaded_file_count += 1

return uploaded_file_count
Expand Down Expand Up @@ -671,7 +676,7 @@ def _join_bucket_folder(self, bucket_path: str = "") -> str:
f"Bucket path {bucket_path!r} is already prefixed with "
f"bucket folder {self.bucket_folder!r}; is this intentional?"
)
return str(Path(self.bucket_folder) / bucket_path)
return (Path(self.bucket_folder) / bucket_path).as_posix()

@sync_compatible
async def list_objects(
Expand Down Expand Up @@ -893,7 +898,7 @@ async def download_folder_to_path(
to_path = str(to_path) # must be string
self.logger.info(
f"Downloading object from bucket {self.bucket_name!r} path "
f"{str(bucket_path)!r} to {to_path!r}."
f"{bucket_path.as_posix()!r} to {to_path!r}."
)
async_coros.append(
run_sync_in_worker_thread(
Expand Down Expand Up @@ -1051,7 +1056,9 @@ async def upload_from_folder(
# `my_folder/notes.txt` will be uploaded
if from_path.is_dir():
continue
bucket_path = str(Path(bucket_folder) / from_path.relative_to(from_folder))
bucket_path = (
Path(bucket_folder) / from_path.relative_to(from_folder)
).as_posix()
self.logger.info(
f"Uploading from {str(from_path)!r} to the bucket "
f"{self.bucket_name!r} path {bucket_path!r}."
Expand Down
21 changes: 16 additions & 5 deletions tests/test_s3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import io
import os
from pathlib import Path
from pathlib import Path, PurePosixPath, PureWindowsPath

import boto3
import pytest
Expand Down Expand Up @@ -363,20 +363,31 @@ async def test_read_fails_does_not_exist(s3_bucket):
await s3_bucket.read_path("test_bucket/foo/bar")


async def test_aws_basepath(s3_bucket, aws_creds_block):

@pytest.mark.parametrize("type_", [PureWindowsPath, PurePosixPath, str])
@pytest.mark.parametrize("delimiter", ["\\", "/"])
async def test_aws_basepath(s3_bucket, aws_creds_block, delimiter, type_):
"""Test the basepath functionality."""

# create a new block with a subfolder
s3_bucket_block = S3Bucket(
bucket_name=BUCKET_NAME,
aws_credentials=aws_creds_block,
basepath="subfolder",
basepath=type_(f"subfolder{delimiter}subsubfolder"),
)

key = await s3_bucket_block.write_path("test.txt", content=b"hello")
assert await s3_bucket_block.read_path("test.txt") == b"hello"
assert key == "subfolder/test.txt"

expected: str = "subfolder/subsubfolder/test.txt"
if (
delimiter == "\\"
and type_ != PureWindowsPath
and (os.sep != "\\" and os.altsep != "\\")
):
# In this case, \\ will not be recognized as a delimiter
# This case is triggered on POSIX systems
expected = "subfolder\\subsubfolder/test.txt"
assert key == expected


async def test_get_directory(
Expand Down