Skip to content

Commit

Permalink
Merge pull request #874 from linsword13/deploy-fix
Browse files Browse the repository at this point in the history
Fix deployment pull file path issue
  • Loading branch information
douglasjacobsen authored Feb 11, 2025
2 parents a9ac42d + cff7059 commit c095dfa
Showing 1 changed file with 25 additions and 184 deletions.
209 changes: 25 additions & 184 deletions lib/ramble/spack/util/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
Utility functions for parsing, formatting, and manipulating URLs.
"""

import itertools
import posixpath
import re
import sys

Expand All @@ -23,26 +21,6 @@
is_windows = sys.platform == 'win32'


def _split_all(path):
"""Split path into its atomic components.
Returns the shortest list, L, of strings such that posixpath.join(*L) ==
path and posixpath.split(element) == ('', element) for every element in L
except possibly the first. This first element may possibly have the value
of '/'.
"""
result = []
a = path
old_a = None
while a != old_a:
(old_a, (a, b)) = a, posixpath.split(a)

if a or b:
result.insert(0, b or '/')

return result


def local_file_path(url):
"""Get a local file path from a url.
Expand Down Expand Up @@ -124,168 +102,31 @@ def format(parsed_url):
return parsed_url.geturl()


def join(base_url, path, *extra, **kwargs):
"""Joins a base URL with one or more local URL path components
If resolve_href is True, treat the base URL as though it where the locator
of a web page, and the remaining URL path components as though they formed
a relative URL to be resolved against it (i.e.: as in posixpath.join(...)).
The result is an absolute URL to the resource to which a user's browser
would navigate if they clicked on a link with an "href" attribute equal to
the relative URL.
If resolve_href is False (default), then the URL path components are joined
as in posixpath.join().
Note: file:// URL path components are not canonicalized as part of this
operation. To canonicalize, pass the joined url to format().
Examples:
base_url = 's3://bucket/index.html'
body = fetch_body(prefix)
link = get_href(body) # link == '../other-bucket/document.txt'
# wrong - link is a local URL that needs to be resolved against base_url
spack.util.url.join(base_url, link)
's3://bucket/other_bucket/document.txt'
# correct - resolve local URL against base_url
spack.util.url.join(base_url, link, resolve_href=True)
's3://other_bucket/document.txt'
prefix = 'https://mirror.spack.io/build_cache'
# wrong - prefix is just a URL prefix
spack.util.url.join(prefix, 'my-package', resolve_href=True)
'https://mirror.spack.io/my-package'
# correct - simply append additional URL path components
spack.util.url.join(prefix, 'my-package', resolve_href=False) # default
'https://mirror.spack.io/build_cache/my-package'
# For canonicalizing file:// URLs, take care to explicitly differentiate
# between absolute and relative join components.
# '$spack' is not an absolute path component
join_result = spack.util.url.join('/a/b/c', '$spack') ; join_result
'file:///a/b/c/$spack'
spack.util.url.format(join_result)
'file:///a/b/c/opt/spack'
# '/$spack' *is* an absolute path component
join_result = spack.util.url.join('/a/b/c', '/$spack') ; join_result
'file:///$spack'
spack.util.url.format(join_result)
'file:///opt/spack'
"""
paths = [
(x) if isinstance(x, str)
else x.geturl()
for x in itertools.chain((base_url, path), extra)]

paths = [convert_to_posix_path(x) for x in paths]
n = len(paths)
last_abs_component = None
scheme = ''
for i in range(n - 1, -1, -1):
obj = urllib.parse.urlparse(
paths[i], scheme='', allow_fragments=False)

scheme = obj.scheme

# in either case the component is absolute
if scheme or obj.path.startswith('/'):
if not scheme:
# Without a scheme, we have to go back looking for the
# next-last component that specifies a scheme.
for j in range(i - 1, -1, -1):
obj = urllib.parse.urlparse(
paths[j], scheme='', allow_fragments=False)

if obj.scheme:
paths[i] = '{SM}://{NL}{PATH}'.format(
SM=obj.scheme,
NL=(
(obj.netloc + '/')
if obj.scheme != 's3' else ''),
PATH=paths[i][1:])
break

last_abs_component = i
break

if last_abs_component is not None:
paths = paths[last_abs_component:]
if len(paths) == 1:
result = urllib.parse.urlparse(
paths[0], scheme='file', allow_fragments=False)

# another subtlety: If the last argument to join() is an absolute
# file:// URL component with a relative path, the relative path
# needs to be resolved.
if result.scheme == 'file' and result.netloc:
result = urllib.parse.ParseResult(
scheme=result.scheme,
netloc='',
path=posixpath.abspath(result.netloc + result.path),
params=result.params,
query=result.query,
fragment=None)

return result.geturl()

return _join(*paths, **kwargs)


def _join(base_url, path, *extra, **kwargs):
base_url = parse(base_url)
resolve_href = kwargs.get('resolve_href', False)

(scheme, netloc, base_path, params, query, _) = base_url
scheme = scheme.lower()

path_tokens = [
part for part in itertools.chain(
_split_all(path),
itertools.chain.from_iterable(
_split_all(extra_path) for extra_path in extra))
if part and part != '/']

base_path_args = ['/fake-root']
if scheme == 's3':
if netloc:
base_path_args.append(netloc)

if base_path.startswith('/'):
base_path = base_path[1:]

base_path_args.append(base_path)

if resolve_href:
new_base_path, _ = posixpath.split(posixpath.join(*base_path_args))
base_path_args = [new_base_path]

base_path_args.extend(path_tokens)
base_path = posixpath.relpath(posixpath.join(*base_path_args), '/fake-root')

if scheme == 's3':
path_tokens = [
part for part in _split_all(base_path)
if part and part != '/']

if path_tokens:
netloc = path_tokens.pop(0)
base_path = posixpath.join('', *path_tokens)

if sys.platform == "win32":
base_path = convert_to_posix_path(base_path)

return format(urllib.parse.ParseResult(scheme=scheme,
netloc=netloc,
path=base_path,
params=params,
query=query,
fragment=None))
def join(base: str, *components: str, resolve_href: bool = False, **kwargs) -> str:
"""Convenience wrapper around ``urllib.parse.urljoin``, with a few differences:
1. By default resolve_href=False, which makes the function like os.path.join: for example
https://example.com/a/b + c/d = https://example.com/a/b/c/d. If resolve_href=True, the
behavior is how a browser would resolve the URL: https://example.com/a/c/d.
2. s3://, gs://, oci:// URLs are joined like http:// URLs.
3. It accepts multiple components for convenience. Note that components[1:] are treated as
literal path components and appended to components[0] separated by slashes."""
# Ensure a trailing slash in the path component of the base URL to get os.path.join-like
# behavior instead of web browser behavior.
if not resolve_href:
parsed = urllib.parse.urlparse(base)
if not parsed.path.endswith("/"):
base = parsed._replace(path=f"{parsed.path}/").geturl()
uses_netloc = urllib.parse.uses_netloc
uses_relative = urllib.parse.uses_relative
try:
# NOTE: we temporarily modify urllib internals so s3 and gs schemes are treated like http.
# This is non-portable, and may be forward incompatible with future cpython versions.
urllib.parse.uses_netloc = [*uses_netloc, "s3", "gs", "oci"]
urllib.parse.uses_relative = [*uses_relative, "s3", "gs", "oci"]
return urllib.parse.urljoin(base, "/".join(components), **kwargs)
finally:
urllib.parse.uses_netloc = uses_netloc
urllib.parse.uses_relative = uses_relative


git_re = (
Expand Down

0 comments on commit c095dfa

Please sign in to comment.