Skip to content

Commit

Permalink
[#95] - add registration of existing metadata files stored on S3
Browse files Browse the repository at this point in the history
  • Loading branch information
sblack-usu committed Mar 7, 2024
1 parent 4496724 commit d91eca4
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 2 deletions.
1 change: 1 addition & 0 deletions api/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# import all adapters here to get them registered
from api.adapters import hydroshare
from api.adapters import s3
51 changes: 51 additions & 0 deletions api/adapters/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import boto3
import json
from botocore.client import Config
from botocore import UNSIGNED

from api.adapters.base import AbstractRepositoryMetadataAdapter, AbstractRepositoryRequestHandler
from api.adapters.utils import RepositoryType, register_adapter
from api.models.catalog import DatasetMetadataDOC
from api.models.user import Submission


class _S3RequestHandler(AbstractRepositoryRequestHandler):

def get_metadata(self, record_id: str):
endpoint_url = record_id.split("+")[0]
bucket_name = record_id.split("+")[1]
file_key = record_id.split("+")[2]

s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), endpoint_url=endpoint_url)

response = s3.get_object(Bucket=bucket_name, Key=file_key)
json_content = response['Body'].read().decode('utf-8')

# Parse the JSON content
data = json.loads(json_content)

return data


class S3MetadataAdapter(AbstractRepositoryMetadataAdapter):
repo_api_handler = _S3RequestHandler()

@staticmethod
def to_catalog_record(metadata: dict) -> DatasetMetadataDOC:
return DatasetMetadataDOC(**metadata)

@staticmethod
def to_repository_record(catalog_record: DatasetMetadataDOC):
"""Converts dataset catalog record to hydroshare resource metadata"""
raise NotImplementedError

@staticmethod
def update_submission(submission: Submission, repo_record_id: str) -> Submission:
"""Sets additional hydroshare specific metadata to submission record"""

submission.repository_identifier = repo_record_id
submission.repository = RepositoryType.S3
return submission


register_adapter(RepositoryType.S3, S3MetadataAdapter)
1 change: 1 addition & 0 deletions api/adapters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

class RepositoryType(str, Enum):
HYDROSHARE = 'HYDROSHARE'
S3 = 'S3'


_adapter_registry = {}
Expand Down
18 changes: 18 additions & 0 deletions api/routes/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from api.authentication.user import get_current_user
from api.models.catalog import DatasetMetadataDOC
from api.models.user import Submission, User
from pydantic import BaseModel

router = APIRouter()

Expand Down Expand Up @@ -123,6 +124,23 @@ async def refresh_dataset_from_hydroshare(identifier: str, user: Annotated[User,
return dataset


class S3Path(BaseModel):
path: str
bucket: str
endpoint_url: str = 'https://api.minio.cuahsi.io'


@router.put("/repository/s3", response_model=DatasetMetadataDOC)
async def register_s3_dataset(request_model: S3Path, user: Annotated[User, Depends(get_current_user)]):
path = request_model.path
bucket = request_model.bucket
endpoint_url = request_model.endpoint_url
identifier = f"{endpoint_url}+{bucket}+{path}"
submission: Submission = user.submission_by_repository(repo_type=RepositoryType.S3, identifier=identifier)
dataset = await _save_to_db(repository_type=RepositoryType.S3, identifier=identifier, user=user, submission=submission)
return dataset


async def _save_to_db(repository_type: RepositoryType, identifier: str, user: User, submission: Submission = None):
adapter = get_adapter_by_type(repository_type=repository_type)
# fetch metadata from repository as catalog dataset
Expand Down
3 changes: 2 additions & 1 deletion docker/requirements/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ uvicorn[standard]
motor
beanie[httpx]==1.19.0
python-jose
pydantic<2.*
pydantic<2.*
boto3
3 changes: 2 additions & 1 deletion docker/requirements/triggers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ beanie==1.19.0
motor
pydantic[dotenv]<2.*
pydantic[email]<2.*
rocketry==2.5.1
rocketry==2.5.1
boto3

0 comments on commit d91eca4

Please sign in to comment.