Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add file:// to self links that don't have a protocol #1472

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions pystac/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@

from pystac import MediaType, STACError, common_metadata, utils
from pystac.html.jinja_env import get_jinja_env
from pystac.utils import is_absolute_href, make_absolute_href, make_relative_href
from pystac.utils import (
is_absolute_href,
make_absolute_href,
make_relative_href,
safe_urlparse,
)

if TYPE_CHECKING:
from pystac.common_metadata import CommonMetadata
Expand Down Expand Up @@ -380,7 +385,7 @@ def get_self_href(self) -> str | None:

def _absolute_href(href: str, owner: Assets | None, action: str = "access") -> str:
if utils.is_absolute_href(href):
return href
return safe_urlparse(href).path
else:
item_self = owner.get_self_href() if owner else None
if item_self is None:
Expand All @@ -389,4 +394,4 @@ def _absolute_href(href: str, owner: Assets | None, action: str = "access") -> s
"and owner item is not set. Hint: try using "
":func:`~pystac.Item.make_asset_hrefs_absolute`"
)
return utils.make_absolute_href(href, item_self)
return safe_urlparse(utils.make_absolute_href(href, item_self)).path
2 changes: 1 addition & 1 deletion pystac/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def __init__(
self.rel = rel
if isinstance(target, str):
if rel == pystac.RelType.SELF:
self._target_href = make_absolute_href(target)
self._target_href = make_absolute_href(target, must_include_scheme=True)
else:
self._target_href = make_posix_style(target)
self._target_object = None
Expand Down
5 changes: 3 additions & 2 deletions pystac/stac_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def read_text_from_href(self, href: str) -> str:
except HTTPError as e:
raise Exception(f"Could not read uri {href}") from e
else:
href = safe_urlparse(href).path
with open(href, encoding="utf-8") as f:
href_contents = f.read()
return href_contents
Expand All @@ -328,7 +329,7 @@ def write_text_to_href(self, href: str, txt: str) -> None:
"""
if _is_url(href):
raise NotImplementedError("DefaultStacIO cannot write to urls")
href = os.fspath(href)
href = safe_urlparse(href).path
dirname = os.path.dirname(href)
if dirname != "" and not os.path.isdir(dirname):
os.makedirs(dirname)
Expand Down Expand Up @@ -391,7 +392,7 @@ def _report_duplicate_object_names(

def _is_url(href: str) -> bool:
parsed = safe_urlparse(href)
return parsed.scheme != ""
return parsed.scheme != "" and parsed.scheme != "file"


if HAS_URLLIB3:
Expand Down
62 changes: 43 additions & 19 deletions pystac/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,34 +288,54 @@ def _make_absolute_href_path(
parsed_source: URLParseResult,
parsed_start: URLParseResult,
start_is_dir: bool = False,
must_include_scheme: bool = False,
) -> str:
# If the source is already absolute, just return it
# If the source is already absolute and doesn't need a scheme just return it
if os.path.isabs(parsed_source.path):
return urlunparse(parsed_source)

# If the start path is not a directory, get the parent directory
start_dir = (
parsed_start.path if start_is_dir else os.path.dirname(parsed_start.path)
)
if must_include_scheme:
abs_path = parsed_source.path
else:
return urlunparse(parsed_source)
else:
# If the start path is not a directory, get the parent directory
start_dir = (
parsed_start.path if start_is_dir else os.path.dirname(parsed_start.path)
)

# Join the start directory to the relative path and find the absolute path
abs_path = make_posix_style(
os.path.abspath(os.path.join(start_dir, parsed_source.path))
)
# Join the start directory to the relative path and find the absolute path
abs_path = make_posix_style(
os.path.abspath(os.path.join(start_dir, parsed_source.path))
)

# Account for the normalization of abspath for
# things like /vsitar// prefixes by replacing the
# original start_dir text when abspath modifies the start_dir.
if not start_dir == make_posix_style(os.path.abspath(start_dir)):
abs_path = abs_path.replace(
make_posix_style(os.path.abspath(start_dir)), start_dir
# Account for the normalization of abspath for
# things like /vsitar// prefixes by replacing the
# original start_dir text when abspath modifies the start_dir.
if not start_dir == make_posix_style(os.path.abspath(start_dir)):
abs_path = abs_path.replace(
make_posix_style(os.path.abspath(start_dir)), start_dir
)

# add a scheme if there isn't one already
if must_include_scheme:
return urlunparse(
(
"file",
parsed_start.netloc,
abs_path,
parsed_source.params,
parsed_source.query,
parsed_source.fragment,
)
)

return abs_path


def make_absolute_href(
source_href: str, start_href: str | None = None, start_is_dir: bool = False
source_href: str,
start_href: str | None = None,
start_is_dir: bool = False,
must_include_scheme: bool = False,
) -> str:
"""Returns a new string that represents ``source_href`` as an absolute path. If
``source_href`` is already absolute it is returned unchanged. If ``source_href``
Expand All @@ -332,6 +352,8 @@ def make_absolute_href(
start_is_dir : If ``True``, ``start_href`` is treated as a directory.
Otherwise, ``start_href`` is considered to be a path to a file. Defaults to
``False``.
must_include_scheme : If ``True``, every output will have a scheme. This means
that local filepaths will be prefixed with `file://`. Defaults to ``False``.

Returns:
str: The absolute HREF.
Expand All @@ -349,7 +371,9 @@ def make_absolute_href(
if parsed_source.scheme != "" or parsed_start.scheme != "":
return _make_absolute_href_url(parsed_source, parsed_start, start_is_dir)
else:
return _make_absolute_href_path(parsed_source, parsed_start, start_is_dir)
return _make_absolute_href_path(
parsed_source, parsed_start, start_is_dir, must_include_scheme
)


def is_absolute_href(href: str) -> bool:
Expand Down
18 changes: 9 additions & 9 deletions tests/test_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ def test_alter_asset_absolute_path(
assert asset.get_absolute_href() == new_href
assert os.path.exists(new_href)
if action == "move":
assert not os.path.exists(old_href)
assert not os.path.exists(old_href.replace("file://", ""))
elif action == "copy":
assert os.path.exists(old_href)
assert os.path.exists(old_href.replace("file://", ""))


@pytest.mark.parametrize("action", ["copy", "move"])
Expand All @@ -38,11 +38,11 @@ def test_alter_asset_relative_path(action: str, tmp_asset: pystac.Asset) -> None
assert asset.href == new_href
href = asset.get_absolute_href()
assert href is not None
assert os.path.exists(href)
assert os.path.exists(href.replace("file://", ""))
if action == "move":
assert not os.path.exists(old_href)
assert not os.path.exists(old_href.replace("file://", ""))
elif action == "copy":
assert os.path.exists(old_href)
assert os.path.exists(old_href.replace("file://", ""))


@pytest.mark.parametrize("action", ["copy", "move"])
Expand Down Expand Up @@ -82,23 +82,23 @@ def test_delete_asset(tmp_asset: pystac.Asset) -> None:
asset = tmp_asset
href = asset.get_absolute_href()
assert href is not None
assert os.path.exists(href)
assert os.path.exists(href.replace("file://", ""))

asset.delete()

assert not os.path.exists(href)
assert not os.path.exists(href.replace("file://", ""))


def test_delete_asset_relative_no_owner_fails(tmp_asset: pystac.Asset) -> None:
asset = tmp_asset
href = asset.get_absolute_href()
assert href is not None
assert os.path.exists(href)
assert os.path.exists(href.replace("file://", ""))

asset.owner = None

with pytest.raises(ValueError, match="Cannot delete file") as e:
asset.delete()

assert asset.href in str(e.value)
assert os.path.exists(href)
assert os.path.exists(href.replace("file://", ""))
2 changes: 1 addition & 1 deletion tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ def test_save_with_different_stac_io(self) -> None:

assert len(hrefs) == stac_io.mock.write_text.call_count
for call_args_list in stac_io.mock.write_text.call_args_list:
assert call_args_list[0][0] in hrefs
assert f"file://{call_args_list[0][0]}" in hrefs

def test_subcatalogs_saved_to_correct_path(self) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ def test_delete_asset_relative_no_self_link_fails(

assert asset.href in str(e.value)
assert name in collection.assets
assert os.path.exists(href)
assert os.path.exists(href.replace("file://", ""))


def test_permissive_temporal_extent_deserialization(collection: Collection) -> None:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,11 @@ def test_asset_absolute_href(self) -> None:
item.set_self_href(item_path)
rel_asset = Asset("./data.geojson")
rel_asset.set_owner(item)
expected_href = make_posix_style(
expected_filepath = make_posix_style(
os.path.abspath(os.path.join(os.path.dirname(item_path), "./data.geojson"))
)
actual_href = rel_asset.get_absolute_href()
self.assertEqual(expected_href, actual_href)
self.assertEqual(f"file://{expected_filepath}", actual_href)

def test_asset_absolute_href_no_item_self(self) -> None:
item_dict = self.get_example_item_dict()
Expand Down Expand Up @@ -612,7 +612,7 @@ def test_delete_asset_relative_no_self_link_fails(tmp_asset: pystac.Asset) -> No

assert asset.href in str(e.value)
assert name in item.assets
assert os.path.exists(href)
assert os.path.exists(href.replace("file://", ""))


def test_resolve_collection_with_root(
Expand Down
6 changes: 3 additions & 3 deletions tests/test_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def test_catalog(self) -> None:
href = self.strategy.get_href(
cat, parent_dir="https://example.com", is_root=True
)
self.assertEqual(href, "/an/href")
self.assertEqual(href, "file:///an/href")

def test_collection(self) -> None:
collection = TestCases.case_8()
Expand All @@ -434,7 +434,7 @@ def test_collection(self) -> None:
href = self.strategy.get_href(
collection, parent_dir="https://example.com", is_root=True
)
self.assertEqual(href, "/an/href")
self.assertEqual(href, "file:///an/href")

def test_item(self) -> None:
collection = TestCases.case_8()
Expand All @@ -444,7 +444,7 @@ def test_item(self) -> None:
self.strategy.get_href(item, parent_dir="http://example.com")
item.set_self_href("/an/href")
href = self.strategy.get_href(item, parent_dir="http://example.com")
self.assertEqual(href, "/an/href")
self.assertEqual(href, "file:///an/href")


class APILayoutStrategyTest(unittest.TestCase):
Expand Down
16 changes: 12 additions & 4 deletions tests/test_link.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ def test_resolved_self_href(self) -> None:
link = catalog.get_single_link(pystac.RelType.SELF)
assert link
link.resolve_stac_object()
self.assertEqual(link.get_absolute_href(), make_posix_style(path))
self.assertEqual(
link.get_absolute_href(), f"file://{make_posix_style(path)}"
)

def test_target_getter_setter(self) -> None:
link = pystac.Link("my rel", target="./foo/bar.json")
Expand Down Expand Up @@ -140,7 +142,10 @@ def test_relative_self_href(self) -> None:
item = pystac.read_file("item.json")
href = item.get_self_href()
assert href
self.assertTrue(os.path.isabs(href), f"Not an absolute path: {href}")
self.assertTrue(
os.path.isabs(href.replace("file://", "")),
f"Not an absolute path: {href}",
)
finally:
os.chdir(previous)

Expand Down Expand Up @@ -237,7 +242,10 @@ def test_from_dict_round_trip(self) -> None:
d2 = pystac.Link.from_dict(d).to_dict()
self.assertEqual(d, d2)
d = {"rel": "self", "href": "t"}
d2 = {"rel": "self", "href": make_posix_style(os.path.join(os.getcwd(), "t"))}
d2 = {
"rel": "self",
"href": f"file://{make_posix_style(os.path.join(os.getcwd(), 't'))}",
}
self.assertEqual(pystac.Link.from_dict(d).to_dict(), d2)

def test_from_dict_failures(self) -> None:
Expand Down Expand Up @@ -333,7 +341,7 @@ def test_relative_self_link(tmp_path: Path) -> None:
assert read_item
asset_href = read_item.assets["data"].get_absolute_href()
assert asset_href
assert Path(asset_href).exists()
assert Path(asset_href.replace("file://", "")).exists()


@pytest.mark.parametrize("rel", HIERARCHICAL_LINKS)
Expand Down
4 changes: 3 additions & 1 deletion tests/validation/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ def test_validate_all_dict(self, test_case: pystac.Catalog) -> None:
dst_dir = os.path.join(tmp_dir, "catalog")
# Copy test case 7 to the temporary directory
catalog_href = get_opt(TestCases.case_7().get_self_href())
shutil.copytree(os.path.dirname(catalog_href), dst_dir)
shutil.copytree(
os.path.dirname(catalog_href.replace("file://", "")), dst_dir
)

new_cat_href = os.path.join(dst_dir, "catalog.json")

Expand Down
Loading