Skip to content

Commit

Permalink
Fix datasets API with forward compat (#194)
Browse files Browse the repository at this point in the history
* Fix datasets API with forward compat

* Add property for base URL

* fix
  • Loading branch information
epwalsh authored Jan 3, 2023
1 parent 3869193 commit a9ae078
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 119 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ use patch releases for compatibility fixes instead.

## Unreleased

### Changed

- `Beaker.dataset.ls()` now returns a list instead of a generator.

### Fixed

- Fixed `Beaker.dataset.ls()`, `.get_file()`, `.stream_file()`, `.fetch()`, `.size()` methods.

## [v1.13.1](https://github.com/allenai/beaker-py/releases/tag/v1.13.1) - 2022-12-21

### Added
Expand Down
54 changes: 42 additions & 12 deletions beaker/data_model/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime
from typing import Optional, Tuple, Union
from urllib.parse import urlparse

from pydantic import validator

Expand All @@ -12,9 +13,10 @@
"DatasetSize",
"Dataset",
"DatasetStorageInfo",
"DatasetInfo",
"DatasetInfoPage",
"Digest",
"FileInfo",
"DatasetManifest",
"DatasetsPage",
"DatasetSpec",
"DatasetPatch",
Expand All @@ -24,18 +26,39 @@

class DatasetStorage(BaseModel):
id: str
address: str
token: str
token_expires: datetime
address: Optional[str] = None
url: Optional[str] = None
urlv2: Optional[str] = None

@validator("address")
def _validate_address(cls, v: Optional[str]) -> Optional[str]:
if v is not None and v.startswith("fh://"):
# HACK: fix prior to https://github.com/allenai/beaker/pull/2962
return v.replace("fh://", "https://", 1)
else:
return v

@property
def scheme(self) -> Optional[str]:
return "fh" if self.urlv2 is None else urlparse(self.urlv2).scheme

@property
def base_url(self) -> str:
if self.address is not None:
return self.address
elif self.urlv2 is not None:
return f"https://{urlparse(self.urlv2).netloc}"
else:
raise ValueError("Missing field 'urlv2' or 'address'")


class DatasetSize(BaseModel):
final: bool
files: int
bytes: int
bytes_human: str
final: Optional[bool] = None
bytes_human: Optional[str] = None


class Dataset(BaseModel):
Expand Down Expand Up @@ -129,9 +152,10 @@ def decode(self) -> bytes:

class FileInfo(BaseModel, arbitrary_types_allowed=True):
path: str
digest: Digest
updated: datetime

digest: Optional[Digest] = None

size: Optional[int] = None
"""
The size of the file, if known.
Expand All @@ -140,22 +164,28 @@ class FileInfo(BaseModel, arbitrary_types_allowed=True):
IGNORE_FIELDS = {"url"}

@validator("digest", pre=True)
def _validate_digest(cls, v: Union[str, Digest]) -> Digest:
def _validate_digest(cls, v: Union[str, Digest, None]) -> Optional[Digest]:
if isinstance(v, Digest):
return v
else:
elif isinstance(v, str):
return Digest(v)


class DatasetManifest(BaseModel):
files: Tuple[FileInfo, ...]
cursor: Optional[str] = None
else:
return None


class DatasetsPage(BasePage[Dataset]):
data: Tuple[Dataset, ...]


class DatasetInfoPage(BasePage[FileInfo]):
data: Tuple[FileInfo, ...]


class DatasetInfo(BaseModel):
page: DatasetInfoPage
size: DatasetSize


class DatasetSpec(BaseModel):
workspace: Optional[str] = None
description: Optional[str] = None
Expand Down
Loading

0 comments on commit a9ae078

Please sign in to comment.