Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add union structure family #668

Draft
wants to merge 25 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f2ed91e
Sort enum members.
danielballan Feb 23, 2024
f6fb66a
Move structure_family to the end, matching migration result.
danielballan Feb 23, 2024
b87a35e
Add union structure.
danielballan Feb 23, 2024
3fd300e
Sort
danielballan Feb 23, 2024
632b2f9
Refactor get_adapter to accept optional data_source_id.
danielballan Feb 23, 2024
e8e91c4
Creating a union node works.
danielballan Feb 23, 2024
fb6fea9
Return correct union structure.
danielballan Feb 23, 2024
87fc992
Forgot to commit modules
danielballan Feb 24, 2024
69cb505
Validate data source consistency.
danielballan Feb 24, 2024
9c5b1a0
Test mixing tables and arrays.
danielballan Feb 24, 2024
35e450b
Writing a table into a union node works.
danielballan Feb 25, 2024
a46ddd8
GET with '?data_source=<name>' works.
danielballan Feb 25, 2024
bfe2804
Use name in filepath, instead of random hex.
danielballan Feb 25, 2024
e36d382
Expose list of all keys in structure.
danielballan Feb 25, 2024
c2fe7f5
Only set include_data_sources param if not default (false).
danielballan Feb 25, 2024
c96b97a
Refactor link-writing into separate module.
danielballan Feb 25, 2024
e5bff27
Writing and reading tables works
danielballan Feb 25, 2024
bd78016
Writing and reading arrays works.
danielballan Feb 26, 2024
a4c1530
Implement single-key access.
danielballan Feb 26, 2024
d79b782
Only specify include_data_sources if not default.
danielballan Feb 27, 2024
a407021
Rename contents -> parts.
danielballan Feb 27, 2024
53d37e0
Clarify precedence
danielballan Mar 3, 2024
b42e8de
Copyedit comment
danielballan Mar 3, 2024
ec34f47
Finish consolidating structure family check
danielballan Mar 3, 2024
034c878
Add comment
danielballan Mar 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 172 additions & 1 deletion tiled/_tests/test_writing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
from ..client import Context, from_context, record_history
from ..queries import Key
from ..server.app import build_app
from ..structures.core import Spec
from ..structures.array import ArrayStructure
from ..structures.core import Spec, StructureFamily
from ..structures.data_source import DataSource
from ..structures.sparse import COOStructure
from ..structures.table import TableStructure
from ..validation_registration import ValidationRegistry
from .utils import fail_with_status_code

Expand Down Expand Up @@ -451,3 +453,172 @@ async def test_container_export(tree):
a.write_array([1, 2, 3], key="b")
buffer = io.BytesIO()
client.export(buffer, format="application/json")


def test_union_one_table(tree):
with Context.from_app(build_app(tree)) as context:
client = from_context(context)
df = pandas.DataFrame({"A": [], "B": []})
structure = TableStructure.from_pandas(df)
data_source = DataSource(
structure_family=StructureFamily.table,
structure=structure,
name="table",
)
client.create_union([data_source], key="x")


def test_union_two_tables(tree):
with Context.from_app(build_app(tree)) as context:
client = from_context(context)
df1 = pandas.DataFrame({"A": [], "B": []})
df2 = pandas.DataFrame({"C": [], "D": [], "E": []})
structure1 = TableStructure.from_pandas(df1)
structure2 = TableStructure.from_pandas(df2)
x = client.create_union(
[
DataSource(
structure_family=StructureFamily.table,
structure=structure1,
name="table1",
),
DataSource(
structure_family=StructureFamily.table,
structure=structure2,
name="table2",
),
],
key="x",
)
x.parts["table1"].write(df1)
x.parts["table2"].write(df2)
x.parts["table1"].read()
x.parts["table2"].read()


def test_union_two_tables_colliding_names(tree):
with Context.from_app(build_app(tree)) as context:
client = from_context(context)
df1 = pandas.DataFrame({"A": [], "B": []})
df2 = pandas.DataFrame({"C": [], "D": [], "E": []})
structure1 = TableStructure.from_pandas(df1)
structure2 = TableStructure.from_pandas(df2)
with fail_with_status_code(422):
client.create_union(
[
DataSource(
structure_family=StructureFamily.table,
structure=structure1,
name="table1",
),
DataSource(
structure_family=StructureFamily.table,
structure=structure2,
name="table1", # collision
),
],
key="x",
)


def test_union_two_tables_colliding_keys(tree):
with Context.from_app(build_app(tree)) as context:
client = from_context(context)
df1 = pandas.DataFrame({"A": [], "B": []})
df2 = pandas.DataFrame({"A": [], "C": [], "D": []})
structure1 = TableStructure.from_pandas(df1)
structure2 = TableStructure.from_pandas(df2)
with fail_with_status_code(422):
client.create_union(
[
DataSource(
structure_family=StructureFamily.table,
structure=structure1,
name="table1",
),
DataSource(
structure_family=StructureFamily.table,
structure=structure2,
name="table2",
),
],
key="x",
)


def test_union_two_tables_two_arrays(tree):
with Context.from_app(build_app(tree)) as context:
client = from_context(context)
df1 = pandas.DataFrame({"A": [], "B": []})
df2 = pandas.DataFrame({"C": [], "D": [], "E": []})
arr1 = numpy.ones((5, 5), dtype=numpy.float64)
arr2 = 2 * numpy.ones((5, 5), dtype=numpy.int8)
structure1 = TableStructure.from_pandas(df1)
structure2 = TableStructure.from_pandas(df2)
structure3 = ArrayStructure.from_array(arr1)
structure4 = ArrayStructure.from_array(arr2)
x = client.create_union(
[
DataSource(
structure_family=StructureFamily.table,
structure=structure1,
name="table1",
),
DataSource(
structure_family=StructureFamily.table,
structure=structure2,
name="table2",
),
DataSource(
structure_family=StructureFamily.array,
structure=structure3,
name="F",
),
DataSource(
structure_family=StructureFamily.array,
structure=structure4,
name="G",
),
],
key="x",
)
# Write by data source.
x.parts["table1"].write(df1)
x.parts["table2"].write(df2)
x.parts["F"].write_block(arr1, (0, 0))
x.parts["G"].write_block(arr2, (0, 0))

# Read by data source.
x.parts["table1"].read()
x.parts["table2"].read()
x.parts["F"].read()
x.parts["G"].read()

# Read by column.
for column in ["A", "B", "C", "D", "E", "F", "G"]:
x[column].read()


def test_union_table_column_array_key_collision(tree):
with Context.from_app(build_app(tree)) as context:
client = from_context(context)
df = pandas.DataFrame({"A": [], "B": []})
arr = numpy.array([], dtype=numpy.float64)
structure1 = TableStructure.from_pandas(df)
structure2 = ArrayStructure.from_array(arr)
with fail_with_status_code(422):
client.create_union(
[
DataSource(
structure_family=StructureFamily.table,
structure=structure1,
name="table",
),
DataSource(
structure_family=StructureFamily.array,
structure=structure2,
name="B",
),
],
key="x",
)
3 changes: 3 additions & 0 deletions tiled/adapters/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,6 @@ def read_partition(self, *args, **kwargs):

def structure(self):
return self._structure

def get(self, key):
return self.dataframe_adapter.get(key)
5 changes: 5 additions & 0 deletions tiled/adapters/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ def __getitem__(self, key):
# Must compute to determine shape.
return ArrayAdapter.from_array(self.read([key])[key].values)

def get(self, key):
if key not in self.structure().columns:
return None
return ArrayAdapter.from_array(self.read([key])[key].values)

def items(self):
yield from (
(key, ArrayAdapter.from_array(self.read([key])[key].values))
Expand Down
Loading
Loading