Skip to content

Commit

Permalink
feat/add dataservice api endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
khalsz committed Dec 11, 2024
1 parent d241b71 commit 2ec8d24
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 146 deletions.
75 changes: 72 additions & 3 deletions src/api/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import uuid
from enum import Enum
from pathlib import Path

import click
import dask
import numpy as np
import pandas as pd
import pyarrow.parquet as pq
from psycopg2.extras import Json
from sqlalchemy import MetaData, create_engine, text
from sqlalchemy_utils.functions import (
create_database,
Expand All @@ -27,6 +27,25 @@
LAST_MAST_SHOT = 30471 # This is the last MAST shot before MAST-U


class Context(str, Enum):
DCAT = "http://www.w3.org/ns/dcat#"
DCT = "http://purl.org/dc/terms/"
FOAF = "http://xmlns.com/foaf/0.1/"
SCHEMA = "schema.org"
DQV = "http://www.w3.org/ns/dqv#"
SDMX = "http://purl.org/linked-data/sdmx/2009/measure#"


base_context = {
"schema": Context.SCHEMA,
"dqv": Context.DQV,
"sdmx-measure": Context.SDMX,
"dcat": Context.DCAT,
"foaf": Context.FOAF,
"dct": Context.DCT,
}


class URLType(Enum):
"""Enum type for different types of storage endpoint"""

Expand Down Expand Up @@ -187,10 +206,8 @@ def create_signals(self, data_path: Path):
df = signals_metadata
df = df[df.shot_id <= LAST_MAST_SHOT]
df = df.drop_duplicates(subset="uuid")

df["shape"] = df["shape"].map(lambda x: x.tolist())
df["dimensions"] = df["dimensions"].map(lambda x: x.tolist())

df["url"] = (
"s3://mast/level1/shots/"
+ df["shot_id"].map(str)
Expand Down Expand Up @@ -219,6 +236,55 @@ def create_sources(self, data_path: Path):
source_metadata = source_metadata[column_names]
source_metadata.to_sql("sources", self.uri, if_exists="append", index=False)

def create_serve_dataset(self):
data = {
"servesdataset": [
[
"host/json/dataset/shots",
"host/json/dataset/shots/aggregate",
"host/json/dataset/shots/shot_id",
"host/json/dataset/shots/shot_id/signal",
"host/json/dataset/signals",
"host/json/dataset/signals/uuid",
"host/json/dataset/signals/uuid/shots",
"host/json/dataset/scenario",
"host/json/dataset/source",
"host/json/dataset/source/aggregate",
"host/json/dataset/source/name",
"host/json/dataset/cpfsummary",
]
],
"theme": [
[
"host/json/dataset/shots",
"host/json/dataset/signal",
"host/json/dataset/source",
"host/json/dataset/scenario",
"host/json/dataset/cpfsummary",
]
],
"type": ["dcat:DataService"],
"id": ["host/json/data-service"],
"title": ["MASTU Data Service"],
"description": [
"UKAEA Data Service providing access to the MAST-U dataset. \
This includes signal, source, shots and other datasets."
],
"endpointurl": ["host"],
}
publisher = {
"dct__publisher": {
"type_": "foaf:Organization",
"foaf:name": "UKAEA",
"foaf:homepage": "http://ukaea.uk",
}
}
df = pd.DataFrame(data, index=[0])
df["publisher"] = Json(publisher)
df["id"] = "host/json/data-service"
df["context"] = Json(dict(list(base_context.items())[-3:]))
df.to_sql("dataservice", self.uri, if_exists="append", index=False)


def read_cpf_metadata(cpf_file_name: Path) -> pd.DataFrame:
cpf_metadata = pd.read_parquet(cpf_file_name)
Expand Down Expand Up @@ -257,6 +323,9 @@ def create_db_and_tables(data_path):
logging.info("Create Signals")
client.create_signals(data_path)

logging.info("Create dataservice")
client.create_serve_dataset()

client.create_user()


Expand Down
8 changes: 7 additions & 1 deletion src/api/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def apply_filters(query: Query, filters: str) -> Query:
def apply_sorting(query: Query, sort: t.Optional[str] = None) -> Query:
if sort is None:
return query

if sort.startswith("-"):
sort = sort[1:]
order = desc(column(sort))
Expand Down Expand Up @@ -221,6 +221,12 @@ def get_shot(shot_id: int):
return query


def get_dataservices(db: Session):
query = select(models.DataService)
query = db.execute(query).one()[0]
return query


def get_signal_datasets(
sort: t.Optional[str] = "name",
fields: t.Optional[t.List[str]] = [],
Expand Down
Loading

0 comments on commit 2ec8d24

Please sign in to comment.