Skip to content

Commit

Permalink
chore: add back special datatype for timestamp (#733)
Browse files Browse the repository at this point in the history
* Revert "Revert "feat: add special datatype for timestamp (#730)" (#732)"

This reverts commit 5e3863c.

* support iso format by default, refactor tz localization
  • Loading branch information
nankolena authored Jan 7, 2025
1 parent 7ba0dff commit 3c8fb7e
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 4 deletions.
3 changes: 3 additions & 0 deletions docs/reference/experimental/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@
options:
members: ["download_results_by_tag"]
show_root_heading: true
::: kolena._experimental.special_data_type
options:
show_root_heading: true
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from classification.multiclass.constants import DATASET
from classification.multiclass.constants import ID_FIELDS

from kolena.annotation import ScoredClassificationLabel
from kolena.dataset import download_dataset
from kolena.dataset import upload_results
from kolena.workflow.annotation import ScoredClassificationLabel


MODELS = ["resnet50v2", "inceptionv3"]
Expand Down
83 changes: 83 additions & 0 deletions kolena/_experimental/special_data_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Copyright 2021-2024 Kolena Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Special data types supported on the Kolena platform.
""" # noqa: E501
from abc import ABCMeta
from datetime import datetime
from typing import Optional

import pytz

from kolena._utils.datatypes import DataCategory
from kolena._utils.datatypes import DataType
from kolena._utils.datatypes import TypedDataObject
from kolena._utils.pydantic_v1.dataclasses import dataclass
from kolena._utils.validators import ValidatorConfig


class _SpecialDataType(DataType):
TIMESTAMP = "TIMESTAMP"

@staticmethod
def _data_category() -> DataCategory:
return DataCategory.SPECIAL


@dataclass(frozen=True, config=ValidatorConfig)
class SpecialDataType(TypedDataObject[_SpecialDataType], metaclass=ABCMeta):
"""The base class for all special data types."""


@dataclass(frozen=True, config=ValidatorConfig)
class Timestamp(SpecialDataType):
"""
!!! note "Experimental"
This class is considered **experimental**
Timestamp data type.
"""

epoch_time: Optional[float] = None
"""The epoch time of the timestamp. If `value` and `format` are specified, the `epoch_time` will be calculated."""

value: Optional[str] = None
"""
The timestamp in a string representation. Note that GMT timezone is assumed unless the offset is specified in the
string.
"""

format: Optional[str] = None
"""
The format of the `value` string following the
[python format codes](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes). If not
provided, the `value` will be parsed using
[python's `fromisoformat()`](https://docs.python.org/3/library/datetime.html#datetime.datetime.fromisoformat).
"""

@staticmethod
def _data_type() -> _SpecialDataType:
return _SpecialDataType.TIMESTAMP

def __post_init__(self) -> None:
if self.value:
if not self.format:
time_obj = datetime.fromisoformat(self.value)
else:
time_obj = datetime.strptime(self.value, self.format)
# assume GMT if timezone is not provided
if not time_obj.tzinfo:
time_obj = pytz.utc.localize(time_obj)
object.__setattr__(self, "epoch_time", time_obj.timestamp())
3 changes: 3 additions & 0 deletions kolena/_utils/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class DataCategory(str, Enum):
METRICS = "METRICS"
ASSET = "ASSET"
ANNOTATION = "ANNOTATION"
SPECIAL = "SPECIAL"

def data_category_to_module_name(self) -> str:
if self == DataCategory.TEST_SAMPLE:
Expand All @@ -97,6 +98,8 @@ def data_category_to_module_name(self) -> str:
return "kolena.asset"
if self == DataCategory.ANNOTATION:
return "kolena.annotation"
if self == DataCategory.SPECIAL:
return "kolena._experimental.data_type.special"
raise ValueError(f"Must specify module name for data category: {self}")


Expand Down
1 change: 1 addition & 0 deletions kolena/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class _AnnotationType(DataType):
TIME_SEGMENT = "TIME_SEGMENT"
TEXT_SEGMENT = "TEXT_SEGMENT"
CUSTOM = "CUSTOM"
TIMESTAMP = "TIMESTAMP"

@staticmethod
def _data_category() -> DataCategory:
Expand Down
11 changes: 8 additions & 3 deletions tests/integration/dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
import pytest

from kolena._api.v2.dataset import CommitData
from kolena._experimental.special_data_type import Timestamp
from kolena.annotation import BoundingBox
from kolena.annotation import LabeledBoundingBox
from kolena.dataset import download_dataset
from kolena.dataset import list_datasets
from kolena.dataset import upload_dataset
from kolena.dataset.dataset import _fetch_dataset_history
from kolena.dataset.dataset import _load_dataset_metadata
from kolena.errors import InputValidationError
from kolena.errors import NotFoundError
from kolena.workflow.annotation import BoundingBox
from kolena.workflow.annotation import LabeledBoundingBox
from tests.integration.helper import assert_frame_equal
from tests.integration.helper import fake_locator
from tests.integration.helper import upload_extracted_properties
Expand Down Expand Up @@ -83,6 +84,8 @@ def test__upload_dataset() -> None:
LabeledBoundingBox(label="cat", top_left=[i, i], bottom_right=[i + 10, i + 10]),
LabeledBoundingBox(label="dog", top_left=[i + 5, i + 5], bottom_right=[i + 20, i + 20]),
],
time_str=Timestamp(value=f"12/31/2024, 00:00:{'{:02d}'.format(i)}", format="%m/%d/%Y, %H:%M:%S"),
time_num=Timestamp(epoch_time=1735689600 + i),
)
for i in range(20)
]
Expand All @@ -96,10 +99,12 @@ def test__upload_dataset() -> None:
BoundingBox(label=bbox.label, top_left=bbox.top_left, bottom_right=bbox.bottom_right)
for bbox in dp["bboxes"]
],
time_str=dp["time_str"],
time_num=dp["time_num"],
)
for dp in datapoints
]
columns = ["locator", "width", "height", "city", "bboxes"]
columns = ["locator", "width", "height", "city", "bboxes", "time_str", "time_num"]

upload_dataset(name, pd.DataFrame(datapoints[:10], columns=columns), id_fields=["locator"])

Expand Down
99 changes: 99 additions & 0 deletions tests/unit/_experimental/test_special_data_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Copyright 2021-2024 Kolena Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import Dict
from typing import Optional

import pytest

from kolena._experimental.special_data_type import _SpecialDataType
from kolena._experimental.special_data_type import Timestamp
from kolena._utils.datatypes import DATA_TYPE_FIELD


@pytest.mark.parametrize(
"object, json_data",
[
(
Timestamp(epoch_time=1700000000),
{
"epoch_time": 1700000000,
"value": None,
"format": None,
},
),
(
Timestamp(value="12/31/2024, 00:00:00", format="%m/%d/%Y, %H:%M:%S"),
{
"epoch_time": 1735603200,
"value": "12/31/2024, 00:00:00",
"format": "%m/%d/%Y, %H:%M:%S",
},
),
],
)
def test__serde__timestamp(object: Timestamp, json_data: Dict[str, Any]) -> None:
object_dict = object._to_dict()
assert object_dict == {
**json_data,
DATA_TYPE_FIELD: f"{_SpecialDataType._data_category().value}/{_SpecialDataType.TIMESTAMP.value}",
}
assert Timestamp._from_dict(object_dict) == object


@pytest.mark.parametrize(
"value, format, epoch_time",
[
("12/31/2024, 00:00:00", "%m/%d/%Y, %H:%M:%S", 1735603200),
("25/05/99 02:35:5.523", "%d/%m/%y %H:%M:%S.%f", 927599705.523),
("2021/05/25", "%Y/%m/%d", 1621900800),
("2021-05-25 02:35:15", "%Y-%m-%d %H:%M:%S", 1621910115),
("Tuesday, December 31, 2024 5:00:00 AM", "%A, %B %d, %Y %H:%M:%S %p", 1735621200),
("Tuesday, December 31, 2024 00:00:00 AM GMT-05:00", "%A, %B %d, %Y %H:%M:%S %p %Z%z", 1735621200),
("Tuesday, December 31, 2024 00:00:00 AM UTC-05:00", "%A, %B %d, %Y %H:%M:%S %p %Z%z", 1735621200),
],
)
def test__timestamp_epoch_conversion_with_format(value: str, format: str, epoch_time: float) -> None:
timestamp_object = Timestamp(value=value, format=format)
assert epoch_time == timestamp_object.epoch_time


@pytest.mark.parametrize(
"value, epoch_time",
[
("2024-12-31", 1735603200),
("2024-12-31 00:00:00", 1735603200),
("2024-12-31 12:00:00+00:00", 1735646400),
("2024-12-31 12:00:00-00:00", 1735646400),
("2024-12-31 12:00:00+05:00", 1735628400),
("2024-12-31 12:00:00-05:00", 1735664400),
],
)
def test__timestamp_epoch_conversion_iso(value: str, epoch_time: float) -> None:
timestamp_object = Timestamp(value=value)
assert epoch_time == timestamp_object.epoch_time


@pytest.mark.parametrize(
"value, format",
[
# value without format and not following ISO 8601 format
("12/31/2024, 00:00:00", None),
# format inconsistent with value
("12/31/2024, 00:00:00", "%m/%d/%Y, %s"),
],
)
def test__timestamp_validation(value: str, format: Optional[str]) -> None:
with pytest.raises(ValueError):
Timestamp(value=value, format=format)

0 comments on commit 3c8fb7e

Please sign in to comment.