Skip to content

Commit

Permalink
[python] Bindings for upgrade_domain (#3235)
Browse files Browse the repository at this point in the history
* [python/r] Bindings for `upgrade_domain`

* unit-test coverage

* update checking for none-slots for string index columns

* code-review feedback

Co-authored-by: nguyenv <vivian@tiledb.com>

---------

Co-authored-by: nguyenv <vivian@tiledb.com>
  • Loading branch information
johnkerl and nguyenv authored Oct 28, 2024
1 parent 3e20d3c commit 24a7438
Show file tree
Hide file tree
Showing 6 changed files with 441 additions and 34 deletions.
109 changes: 108 additions & 1 deletion apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@
Implementation of a SOMA DataFrame
"""
import inspect
from typing import Any, List, Optional, Sequence, Tuple, Type, Union, cast
from typing import (
Any,
Dict,
List,
Optional,
Sequence,
Tuple,
Type,
Union,
cast,
)

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -484,6 +494,103 @@ def tiledbsoma_upgrade_soma_joinid_shape(
)
return (True, "")

def _upgrade_or_change_domain_helper(
self, newdomain: Domain, function_name_for_messages: str
) -> Any:
"""Converts the user-level tuple of low/high pairs into a pyarrow table suitable for calling libtiledbsoma."""

# Check user-provided domain against dataframe domain.
dim_names = self._tiledb_dim_names()
if len(dim_names) != len(newdomain):
raise ValueError(
f"{function_name_for_messages}: requested domain has length {len(dim_names)} but the dataframe's schema has index-column count {len(newdomain)}"
)

if any([slot is not None and len(slot) != 2 for slot in newdomain]):
raise ValueError(
f"{function_name_for_messages}: requested domain must have low,high pairs, or `None`, in each slot"
)

# From the dataframe's schema, extract the subschema for only index columns (TileDB dimensions).
full_schema = self.schema
dim_schema_list = []
for dim_name in dim_names:
dim_schema_list.append(full_schema.field(dim_name))
dim_schema = pa.schema(dim_schema_list)

# Convert the user's tuple of low/high pairs into a dict keyed by index-column name.
new_domain_dict: Dict[str, Domain] = {}
for dim_name, new_dom in zip(dim_names, newdomain):
# Domain can't be specified for strings (core constraint) so let them keystroke that easily.
if (
dim_schema.field(dim_name).type
in [
pa.string(),
pa.large_string(),
pa.binary(),
pa.large_binary(),
]
and new_dom is None
):
new_domain_dict[dim_name] = ("", "") # type: ignore
else:
new_domain_dict[dim_name] = tuple(new_dom) # type: ignore

# Return this as a pyarrow table. This has n columns where n is the number of
# index columns, and two rows: one row for the low values and one for the high values.
return pa.RecordBatch.from_pydict(new_domain_dict, schema=dim_schema)

def tiledbsoma_upgrade_domain(
self, newdomain: Domain, check_only: bool = False
) -> StatusAndReason:
"""Allows you to set the domain of a SOMA :class:`DataFrame``, when the
``DataFrame`` does not have a domain set yet. The argument must be a
tuple of pairs of low/high values for the desired domain, one pair per
index column. For string index columns, you must offer the low/high pair
as `("", "")`. If ``check_only`` is ``True``, returns whether the
operation would succeed if attempted, and a reason why it would not.
"""
pyarrow_domain_table = self._upgrade_or_change_domain_helper(
newdomain, "tiledbsoma_upgrade_domain"
)

if check_only:
return cast(
StatusAndReason,
self._handle._handle.can_upgrade_domain(
pyarrow_domain_table, "tiledbsoma_upgrade_domain"
),
)
else:
self._handle._handle.upgrade_domain(
pyarrow_domain_table, "tiledbsoma_upgrade_domain"
)
return (True, "")

def change_domain(
self, newdomain: Domain, check_only: bool = False
) -> StatusAndReason:
"""Allows you to enlarge the domain of a SOMA :class:`DataFrame``, when
the ``DataFrame`` already has a domain. The argument must be a tuple of
pairs of low/high values for the desired domain, one pair per index
column. For string index columns, you must offer the low/high pair as
`("", "")`. If ``check_only`` is ``True``, returns whether the
operation would succeed if attempted, and a reason why it would not.
"""
pyarrow_domain_table = self._upgrade_or_change_domain_helper(
newdomain, "change_domain"
)
if check_only:
return cast(
StatusAndReason,
self._handle._handle.can_change_domain(
pyarrow_domain_table, "change_domain"
),
)
else:
self._handle._handle.change_domain(pyarrow_domain_table, "change_domain")
return (True, "")

def __len__(self) -> int:
"""Returns the number of rows in the dataframe. Same as ``df.count``."""
return self.count
Expand Down
50 changes: 50 additions & 0 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,28 @@ def can_upgrade_soma_joinid_shape(
"""Only implemented for DataFrame."""
raise NotImplementedError

def upgrade_domain(
self, newdomain: Domain, function_name_for_messages: str
) -> None:
"""Only implemented for DataFrame."""
raise NotImplementedError

def can_upgrade_domain(
self, newdomain: Domain, function_name_for_messages: str
) -> StatusAndReason:
"""Only implemented for DataFrame."""
raise NotImplementedError

def change_domain(self, newdomain: Domain, function_name_for_messages: str) -> None:
"""Only implemented for DataFrame."""
raise NotImplementedError

def can_change_domain(
self, newdomain: Domain, function_name_for_messages: str
) -> StatusAndReason:
"""Only implemented for DataFrame."""
raise NotImplementedError


class DataFrameWrapper(SOMAArrayWrapper[clib.SOMADataFrame]):
"""Wrapper around a Pybind11 SOMADataFrame handle."""
Expand Down Expand Up @@ -567,6 +589,34 @@ def can_upgrade_soma_joinid_shape(
),
)

def upgrade_domain(
self, newdomain: Domain, function_name_for_messages: str
) -> None:
"""Wrapper-class internals"""
self._handle.upgrade_domain(newdomain, function_name_for_messages)

def can_upgrade_domain(
self, newdomain: Domain, function_name_for_messages: str
) -> StatusAndReason:
"""Wrapper-class internals"""
return cast(
StatusAndReason,
self._handle.can_upgrade_domain(newdomain, function_name_for_messages),
)

def change_domain(self, newdomain: Domain, function_name_for_messages: str) -> None:
"""Wrapper-class internals"""
self._handle.change_domain(newdomain, function_name_for_messages)

def can_change_domain(
self, newdomain: Domain, function_name_for_messages: str
) -> StatusAndReason:
"""Wrapper-class internals"""
return cast(
StatusAndReason,
self._handle.can_change_domain(newdomain, function_name_for_messages),
)


class PointCloudDataFrameWrapper(SOMAArrayWrapper[clib.SOMAPointCloudDataFrame]):
"""Wrapper around a Pybind11 SOMAPointCloudDataFrame handle."""
Expand Down
104 changes: 104 additions & 0 deletions apis/python/src/tiledbsoma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,110 @@ void load_soma_dataframe(py::module& m) {
}
},
"newshape"_a,
"function_name_for_messages"_a)

.def(
"upgrade_domain",
[](SOMADataFrame& sdf,
py::object pyarrow_domain_table,
std::string function_name_for_messages) {
ArrowArray pyarrow_domain_array;
ArrowSchema pyarrow_domain_schema;
uintptr_t nanoarrow_domain_array_ptr =
(uintptr_t)(&pyarrow_domain_array);
uintptr_t nanoarrow_domain_schema_ptr =
(uintptr_t)(&pyarrow_domain_schema);
pyarrow_domain_table.attr("_export_to_c")(
nanoarrow_domain_array_ptr, nanoarrow_domain_schema_ptr);
ArrowTable nanoarrow_domain_table(
std::make_unique<ArrowArray>(pyarrow_domain_array),
std::make_unique<ArrowSchema>(pyarrow_domain_schema));
try {
sdf.upgrade_domain(
nanoarrow_domain_table, function_name_for_messages);
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"pyarrow_domain_table"_a,
"function_name_for_messages"_a)

.def(
"can_upgrade_domain",
[](SOMADataFrame& sdf,
py::object pyarrow_domain_table,
std::string function_name_for_messages) {
ArrowArray pyarrow_domain_array;
ArrowSchema pyarrow_domain_schema;
uintptr_t nanoarrow_domain_array_ptr =
(uintptr_t)(&pyarrow_domain_array);
uintptr_t nanoarrow_domain_schema_ptr =
(uintptr_t)(&pyarrow_domain_schema);
pyarrow_domain_table.attr("_export_to_c")(
nanoarrow_domain_array_ptr, nanoarrow_domain_schema_ptr);
ArrowTable nanoarrow_domain_table(
std::make_unique<ArrowArray>(pyarrow_domain_array),
std::make_unique<ArrowSchema>(pyarrow_domain_schema));
try {
return sdf.can_upgrade_domain(
nanoarrow_domain_table, function_name_for_messages);
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"pyarrow_domain_table"_a,
"function_name_for_messages"_a)

.def(
"change_domain",
[](SOMADataFrame& sdf,
py::object pyarrow_domain_table,
std::string function_name_for_messages) {
ArrowArray pyarrow_domain_array;
ArrowSchema pyarrow_domain_schema;
uintptr_t nanoarrow_domain_array_ptr =
(uintptr_t)(&pyarrow_domain_array);
uintptr_t nanoarrow_domain_schema_ptr =
(uintptr_t)(&pyarrow_domain_schema);
pyarrow_domain_table.attr("_export_to_c")(
nanoarrow_domain_array_ptr, nanoarrow_domain_schema_ptr);
ArrowTable nanoarrow_domain_table(
std::make_unique<ArrowArray>(pyarrow_domain_array),
std::make_unique<ArrowSchema>(pyarrow_domain_schema));
try {
sdf.change_domain(
nanoarrow_domain_table, function_name_for_messages);
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"pyarrow_domain_table"_a,
"function_name_for_messages"_a)

.def(
"can_change_domain",
[](SOMADataFrame& sdf,
py::object pyarrow_domain_table,
std::string function_name_for_messages) {
ArrowArray pyarrow_domain_array;
ArrowSchema pyarrow_domain_schema;
uintptr_t nanoarrow_domain_array_ptr =
(uintptr_t)(&pyarrow_domain_array);
uintptr_t nanoarrow_domain_schema_ptr =
(uintptr_t)(&pyarrow_domain_schema);
pyarrow_domain_table.attr("_export_to_c")(
nanoarrow_domain_array_ptr, nanoarrow_domain_schema_ptr);
ArrowTable nanoarrow_domain_table(
std::make_unique<ArrowArray>(pyarrow_domain_array),
std::make_unique<ArrowSchema>(pyarrow_domain_schema));
try {
return sdf.can_change_domain(
nanoarrow_domain_table, function_name_for_messages);
} catch (const std::exception& e) {
throw TileDBSOMAError(e.what());
}
},
"pyarrow_domain_table"_a,
"function_name_for_messages"_a);
}

Expand Down
Loading

0 comments on commit 24a7438

Please sign in to comment.