Skip to content

Commit

Permalink
feat: add special datatype for timestamp (#730)
Browse files Browse the repository at this point in the history
* fix import path

* add tests

* include new annotation type

* stash

* move timestamp to experimental special class

* convert to utc first

* add docstring, set GMT as default tz

* clean up + add unit tests

* import typing for backwards compatibility

* add logic to convert to utc within timestamp
  • Loading branch information
nankolena authored Dec 16, 2024
1 parent 4558bbf commit d4504d2
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 4 deletions.
3 changes: 3 additions & 0 deletions docs/reference/experimental/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@
options:
members: ["download_results_by_tag"]
show_root_heading: true
::: kolena._experimental.special_data_type
options:
show_root_heading: true
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from classification.multiclass.constants import DATASET
from classification.multiclass.constants import ID_FIELDS

from kolena.annotation import ScoredClassificationLabel
from kolena.dataset import download_dataset
from kolena.dataset import upload_results
from kolena.workflow.annotation import ScoredClassificationLabel


MODELS = ["resnet50v2", "inceptionv3"]
Expand Down
85 changes: 85 additions & 0 deletions kolena/_experimental/special_data_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright 2021-2024 Kolena Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Special data types supported on the Kolena platform.
""" # noqa: E501
from abc import ABCMeta
from datetime import datetime
from typing import Optional

from kolena._utils.datatypes import DataCategory
from kolena._utils.datatypes import DataType
from kolena._utils.datatypes import TypedDataObject
from kolena._utils.pydantic_v1.dataclasses import dataclass
from kolena._utils.validators import ValidatorConfig


class _SpecialDataType(DataType):
TIMESTAMP = "TIMESTAMP"

@staticmethod
def _data_category() -> DataCategory:
return DataCategory.SPECIAL


@dataclass(frozen=True, config=ValidatorConfig)
class SpecialDataType(TypedDataObject[_SpecialDataType], metaclass=ABCMeta):
"""The base class for all special data types."""


@dataclass(frozen=True, config=ValidatorConfig)
class Timestamp(SpecialDataType):
"""
!!! note "Experimental"
This class is considered **experimental**
Timestamp data type.
"""

epoch_time: Optional[float] = None
"""The epoch time of the timestamp. If `value` and `format` are specified, the `epoch_time` will be calculated."""

value: Optional[str] = None
"""
The timestamp in a string representation. If present, the corresponding `format` must be specified too.
Note that GMT timezone is assumed unless the offset is specified in the string.
"""

format: Optional[str] = None
"""
The format of the `value` string following the
[python format codes](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).
"""

@staticmethod
def _data_type() -> _SpecialDataType:
return _SpecialDataType.TIMESTAMP

def __post_init__(self) -> None:
if self.value:
if not self.format:
raise ValueError("format needs to be specified for string timestamp")
if "%z" in self.format:
time_value = self.value
time_format = self.format
else:
time_value = self.value + " +0000"
time_format = self.format + " %z"

object.__setattr__(
self,
"epoch_time",
datetime.strptime(time_value, time_format).timestamp(),
)
3 changes: 3 additions & 0 deletions kolena/_utils/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class DataCategory(str, Enum):
METRICS = "METRICS"
ASSET = "ASSET"
ANNOTATION = "ANNOTATION"
SPECIAL = "SPECIAL"

def data_category_to_module_name(self) -> str:
if self == DataCategory.TEST_SAMPLE:
Expand All @@ -97,6 +98,8 @@ def data_category_to_module_name(self) -> str:
return "kolena.asset"
if self == DataCategory.ANNOTATION:
return "kolena.annotation"
if self == DataCategory.SPECIAL:
return "kolena._experimental.data_type.special"
raise ValueError(f"Must specify module name for data category: {self}")


Expand Down
1 change: 1 addition & 0 deletions kolena/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class _AnnotationType(DataType):
TIME_SEGMENT = "TIME_SEGMENT"
TEXT_SEGMENT = "TEXT_SEGMENT"
CUSTOM = "CUSTOM"
TIMESTAMP = "TIMESTAMP"

@staticmethod
def _data_category() -> DataCategory:
Expand Down
11 changes: 8 additions & 3 deletions tests/integration/dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
import pytest

from kolena._api.v2.dataset import CommitData
from kolena._experimental.special_data_type import Timestamp
from kolena.annotation import BoundingBox
from kolena.annotation import LabeledBoundingBox
from kolena.dataset import download_dataset
from kolena.dataset import list_datasets
from kolena.dataset import upload_dataset
from kolena.dataset.dataset import _fetch_dataset_history
from kolena.dataset.dataset import _load_dataset_metadata
from kolena.errors import InputValidationError
from kolena.errors import NotFoundError
from kolena.workflow.annotation import BoundingBox
from kolena.workflow.annotation import LabeledBoundingBox
from tests.integration.helper import assert_frame_equal
from tests.integration.helper import fake_locator
from tests.integration.helper import upload_extracted_properties
Expand Down Expand Up @@ -83,6 +84,8 @@ def test__upload_dataset() -> None:
LabeledBoundingBox(label="cat", top_left=[i, i], bottom_right=[i + 10, i + 10]),
LabeledBoundingBox(label="dog", top_left=[i + 5, i + 5], bottom_right=[i + 20, i + 20]),
],
time_str=Timestamp(value=f"12/31/2024, 00:00:{'{:02d}'.format(i)}", format="%m/%d/%Y, %H:%M:%S"),
time_num=Timestamp(epoch_time=1735689600 + i),
)
for i in range(20)
]
Expand All @@ -96,10 +99,12 @@ def test__upload_dataset() -> None:
BoundingBox(label=bbox.label, top_left=bbox.top_left, bottom_right=bbox.bottom_right)
for bbox in dp["bboxes"]
],
time_str=dp["time_str"],
time_num=dp["time_num"],
)
for dp in datapoints
]
columns = ["locator", "width", "height", "city", "bboxes"]
columns = ["locator", "width", "height", "city", "bboxes", "time_str", "time_num"]

upload_dataset(name, pd.DataFrame(datapoints[:10], columns=columns), id_fields=["locator"])

Expand Down
83 changes: 83 additions & 0 deletions tests/unit/_experimental/test_special_data_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Copyright 2021-2024 Kolena Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import Dict
from typing import Optional

import pytest

from kolena._experimental.special_data_type import _SpecialDataType
from kolena._experimental.special_data_type import Timestamp
from kolena._utils.datatypes import DATA_TYPE_FIELD


@pytest.mark.parametrize(
"object, json_data",
[
(
Timestamp(epoch_time=1700000000),
{
"epoch_time": 1700000000,
"value": None,
"format": None,
},
),
(
Timestamp(value="12/31/2024, 00:00:00", format="%m/%d/%Y, %H:%M:%S"),
{
"epoch_time": 1735603200,
"value": "12/31/2024, 00:00:00",
"format": "%m/%d/%Y, %H:%M:%S",
},
),
],
)
def test__serde__timestamp(object: Timestamp, json_data: Dict[str, Any]) -> None:
object_dict = object._to_dict()
assert object_dict == {
**json_data,
DATA_TYPE_FIELD: f"{_SpecialDataType._data_category().value}/{_SpecialDataType.TIMESTAMP.value}",
}
assert Timestamp._from_dict(object_dict) == object


@pytest.mark.parametrize(
"value, format, epoch_time",
[
("12/31/2024, 00:00:00", "%m/%d/%Y, %H:%M:%S", 1735603200),
("25/05/99 02:35:5.523", "%d/%m/%y %H:%M:%S.%f", 927599705.523),
("2021/05/25", "%Y/%m/%d", 1621900800),
("2021-05-25 02:35:15", "%Y-%m-%d %H:%M:%S", 1621910115),
("Tuesday, December 31, 2024 5:00:00 AM", "%A, %B %d, %Y %H:%M:%S %p", 1735621200),
("Tuesday, December 31, 2024 00:00:00 AM GMT-05:00", "%A, %B %d, %Y %H:%M:%S %p %Z%z", 1735621200),
("Tuesday, December 31, 2024 00:00:00 AM UTC-05:00", "%A, %B %d, %Y %H:%M:%S %p %Z%z", 1735621200),
],
)
def test__timestamp_epoch_conversion(value: str, format: str, epoch_time: float) -> None:
timestamp_object = Timestamp(value=value, format=format)
assert epoch_time == timestamp_object.epoch_time


@pytest.mark.parametrize(
"value, format",
[
# value without format
("12/31/2024, 00:00:00", None),
# format inconsistent with value
("12/31/2024, 00:00:00", "%m/%d/%Y, %s"),
],
)
def test__timestamp_validation(value: str, format: Optional[str]) -> None:
with pytest.raises(ValueError):
Timestamp(value=value, format=format)

0 comments on commit d4504d2

Please sign in to comment.