-
Notifications
You must be signed in to change notification settings - Fork 390
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MRG] Add a repo provider for CKAN datasets #1833
Changes from 1 commit
b3f9e7c
83768a1
1ba7d01
d083830
32a5341
4ad04ea
2945d83
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
"figshare": "Figshare", | ||
"hydroshare": "Hydroshare", | ||
"dataverse": "Dataverse", | ||
"ckan": "CKAN", | ||
} | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -448,6 +448,73 @@ def get_build_slug(self): | |
return f"hydroshare-{self.record_id}" | ||
|
||
|
||
class CKANProvider(RepoProvider): | ||
"""Provide contents of a CKAN dataset | ||
Users must provide a spec consisting of the CKAN dataset URL. | ||
""" | ||
|
||
name = Unicode("CKAN") | ||
|
||
display_name = "CKAN dataset" | ||
|
||
url_regex = r"/dataset/[a-z0-9_\\-]*$" | ||
|
||
labels = { | ||
"text": "CKAN dataset URL (https://demo.ckan.org/dataset/sample-dataset-1)", | ||
"tag_text": "Git ref (branch, tag, or commit)", | ||
"ref_prop_disabled": True, | ||
"label_prop_disabled": True, | ||
} | ||
|
||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
self.repo = urllib.parse.unquote(self.spec) | ||
|
||
async def get_resolved_ref(self): | ||
parsed_repo = urlparse(self.repo) | ||
self.dataset_id = parsed_repo.path.rsplit("/", maxsplit=1)[1] | ||
|
||
client = AsyncHTTPClient() | ||
|
||
api = parsed_repo._replace( | ||
path=re.sub(self.url_regex, "/api/3/action/", parsed_repo.path) | ||
).geturl() | ||
|
||
package_show_url = f"{api}package_show?id={self.dataset_id}" | ||
|
||
try: | ||
r = await client.fetch(package_show_url, user_agent="BinderHub") | ||
except HTTPError: | ||
return None | ||
|
||
def parse_date(json_body): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that this function is used only once, let's just inline that here. Otherwise the function gets redefined each time the parent function is called. Also adds another layer of indirection that's otherwise not needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. I will fix this as soon as possible. |
||
json_response = json.loads(json_body) | ||
date = json_response["result"]["metadata_modified"] | ||
parsed_date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%f") | ||
epoch = parsed_date.replace(tzinfo=timezone(timedelta(0))).timestamp() | ||
# truncate the timestamp | ||
return str(int(epoch)) | ||
|
||
self.record_id = f"{self.dataset_id}.v{parse_date(r.body)}" | ||
|
||
return self.record_id | ||
|
||
async def get_resolved_spec(self): | ||
if not hasattr(self, "record_id"): | ||
await self.get_resolved_ref() | ||
return self.repo | ||
|
||
def get_repo_url(self): | ||
return self.repo | ||
|
||
async def get_resolved_ref_url(self): | ||
resolved_spec = await self.get_resolved_spec() | ||
return resolved_spec | ||
|
||
def get_build_slug(self): | ||
return f"ckan-{self.dataset_id}" | ||
|
||
|
||
class GitRepoProvider(RepoProvider): | ||
"""Bare bones git repo provider. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
from tornado.ioloop import IOLoop | ||
|
||
from binderhub.repoproviders import ( | ||
CKANProvider, | ||
DataverseProvider, | ||
FigshareProvider, | ||
GistRepoProvider, | ||
|
@@ -209,6 +210,34 @@ async def test_dataverse( | |
assert spec == resolved_spec | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"spec,resolved_spec,resolved_ref,resolved_ref_url,build_slug", | ||
[ | ||
[ | ||
"https://demo.ckan.org/dataset/sample-dataset-1", | ||
"https://demo.ckan.org/dataset/sample-dataset-1", | ||
"sample-dataset-1.v", | ||
"https://demo.ckan.org/dataset/sample-dataset-1", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add some additional tests here for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, added. |
||
"ckan-sample-dataset-1", | ||
], | ||
], | ||
) | ||
async def test_ckan(spec, resolved_spec, resolved_ref, resolved_ref_url, build_slug): | ||
provider = CKANProvider(spec=spec) | ||
|
||
ref = await provider.get_resolved_ref() | ||
assert resolved_ref in ref | ||
|
||
slug = provider.get_build_slug() | ||
assert slug == build_slug | ||
repo_url = provider.get_repo_url() | ||
assert repo_url == spec | ||
ref_url = await provider.get_resolved_ref_url() | ||
assert ref_url == resolved_ref_url | ||
spec = await provider.get_resolved_spec() | ||
assert spec == resolved_spec | ||
|
||
|
||
@pytest.mark.github_api | ||
@pytest.mark.parametrize( | ||
"repo,unresolved_ref,resolved_ref", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same comment about using regexes as in jupyterhub/repo2docker#1336 (comment).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. I have made changes according to your suggestion.