Skip to content

Commit

Permalink
Cleanup URL parsing mechanisms
Browse files Browse the repository at this point in the history
  • Loading branch information
yuvipanda committed Jun 11, 2024
1 parent 4961341 commit 45fb7b9
Showing 1 changed file with 15 additions and 11 deletions.
26 changes: 15 additions & 11 deletions repo2docker/contentproviders/ckan.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,28 +43,32 @@ def detect(self, source, ref=None, extra_args=None):
if not parsed_url.netloc:
return None

url_parts_1 = parsed_url.path.split("/history/")
url_parts_2 = url_parts_1[0].split("/")
if url_parts_2[-2] == "dataset":
self.dataset_id = url_parts_2[-1]
else:
if "/dataset/" not in parsed_url.path:
# Not actually a dataset
return None

api_url_path = "/api/3/action/"
# CKAN may be under a URL prefix, and we should accomodate that
url_prefix, dataset_url = parsed_url.path.split("/dataset/")

dataset_url_parts = dataset_url.split("/")
self.dataset_id = dataset_url_parts[0]

api_url = parsed_url._replace(
path="/".join(url_parts_2[:-2]) + api_url_path, query=""
path=f"{url_prefix}/api/3/action/", query=""
).geturl()

status_show_url = f"{api_url}status_show"
resp = self.urlopen(status_show_url)
if resp.status_code == 200:

# handle the activites
# Activity ID may be present either as a query parameter, activity_id
# or as part of the URL, under `/history/<activity-id>`. If `/history/`
# is present, that takes precedence over `activity_id`
activity_id = None
if parse_qs(parsed_url.query).get("activity_id") is not None:
if "history" in dataset_url_parts:
activity_id = dataset_url_parts[dataset_url_parts.index("history") + 1]
elif parse_qs(parsed_url.query).get("activity_id") is not None:
activity_id = parse_qs(parsed_url.query).get("activity_id")[0]
if len(url_parts_1) == 2:
activity_id = url_parts_1[-1]

self.version = self._fetch_version(api_url)
return {
Expand Down

0 comments on commit 45fb7b9

Please sign in to comment.