Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use default_columns property from hats catalog if available in read_hats #543

Merged
merged 3 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/lsdb/loaders/hats/read_hats.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ def read_hats(
Catalog object loaded from the given parameters
"""
# Creates a config object to store loading parameters from all keyword arguments.

hc_catalog = hc.read_hats(path)

if columns is None and hc_catalog.catalog_info.default_columns is not None:
columns = hc_catalog.catalog_info.default_columns

config = HatsLoadingConfig(
search_filter=search_filter,
columns=columns,
Expand All @@ -69,7 +75,6 @@ def read_hats(
kwargs=kwargs,
)

hc_catalog = hc.read_hats(path)
if hc_catalog.schema is None:
raise ValueError(
"The catalog schema could not be loaded from metadata."
Expand Down
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
SMALL_SKY_TO_XMATCH_SOFT_NAME = "small_sky_to_xmatch_soft"
SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1"
SMALL_SKY_ORDER1_NO_PANDAS_DIR_NAME = "small_sky_order1_no_pandas_meta"
SMALL_SKY_ORDER1_DEFAULT_COLS_DIR_NAME = "small_sky_order1_default_columns"
SMALL_SKY_ORDER1_SOURCE_NAME = "small_sky_order1_source"
SMALL_SKY_ORDER1_SOURCE_MARGIN_NAME = "small_sky_order1_source_margin"
SMALL_SKY_TO_ORDER1_SOURCE_NAME = "small_sky_to_o1source"
Expand Down Expand Up @@ -76,6 +77,11 @@ def small_sky_order1_dir(test_data_dir):
return test_data_dir / SMALL_SKY_ORDER1_DIR_NAME


@pytest.fixture
def small_sky_order1_default_cols_dir(test_data_dir):
return test_data_dir / SMALL_SKY_ORDER1_DEFAULT_COLS_DIR_NAME


@pytest.fixture
def small_sky_order1_no_pandas_dir(test_data_dir):
return test_data_dir / SMALL_SKY_ORDER1_NO_PANDAS_DIR_NAME
Expand Down
39 changes: 39 additions & 0 deletions tests/data/generate_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,45 @@
"sso1.partition_info.write_to_file(hats.io.paths.get_partition_info_pointer(out_catalog_name))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### small_sky_order1_default_columns\n",
"\n",
"Copies small_sky_order1 but adds a list of default columns to the properties file"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-23T18:41:22.261748Z",
"start_time": "2025-01-23T18:41:22.230037Z"
}
},
"outputs": [],
"source": [
"import os\n",
"import hats\n",
"from hats.io.paths import DATASET_DIR\n",
"from hats.io.file_io import get_upath\n",
"\n",
"out_catalog_name = \"small_sky_order1_default_columns\"\n",
"\n",
"sso1 = hats.read_hats(\"small_sky_order1\")\n",
"sso1_dataset_path = get_upath(\"small_sky_order1\") / DATASET_DIR\n",
"out_dataset_path = get_upath(out_catalog_name) / DATASET_DIR\n",
"\n",
"os.mkdir(out_catalog_name)\n",
"os.symlink(f\"../{sso1_dataset_path}\", out_dataset_path)\n",
"sso1.catalog_info.copy_and_update(\n",
" catalog_name=out_catalog_name, default_columns=[\"ra\", \"dec\", \"id\"]\n",
").to_properties_file(out_catalog_name)\n",
"sso1.partition_info.write_to_file(hats.io.paths.get_partition_info_pointer(out_catalog_name))"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
1 change: 1 addition & 0 deletions tests/data/small_sky_order1_default_columns/dataset
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Norder,Npix
1,44
1,45
1,46
1,47
15 changes: 15 additions & 0 deletions tests/data/small_sky_order1_default_columns/properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#HATS catalog
obs_collection=small_sky_order1_default_columns
dataproduct_type=object
hats_nrows=131
hats_col_ra=ra
hats_col_dec=dec
hats_cols_default=ra dec id
hats_max_rows=1000000
hats_order=1
moc_sky_fraction=0.08333
hats_builder=hats-import v0.3.6.dev26+g40366b4
hats_creation_date=2024-10-15T14\:47UTC
hats_estsize=39
hats_release_date=2024-09-18
hats_version=v0.1
25 changes: 25 additions & 0 deletions tests/lsdb/loaders/hats/test_read_hats.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,31 @@ def test_read_hats(small_sky_order1_dir, small_sky_order1_hats_catalog, assert_d
assert_index_correct(catalog)


def test_read_hats_default_cols(small_sky_order1_default_cols_dir, assert_divisions_are_correct):
catalog = lsdb.read_hats(small_sky_order1_default_cols_dir)
assert isinstance(catalog, lsdb.Catalog)
assert isinstance(catalog._ddf, nd.NestedFrame)
assert catalog.hc_structure.catalog_info.default_columns is not None
assert np.all(catalog.columns == catalog.hc_structure.catalog_info.default_columns)
assert np.all(catalog.compute().columns == catalog.hc_structure.catalog_info.default_columns)
assert isinstance(catalog.compute(), npd.NestedFrame)
assert_divisions_are_correct(catalog)
assert_index_correct(catalog)


def test_read_hats_default_cols_specify_cols(small_sky_order1_default_cols_dir, assert_divisions_are_correct):
filter_columns = ["ra", "dec"]
catalog = lsdb.read_hats(small_sky_order1_default_cols_dir, columns=filter_columns)
assert isinstance(catalog, lsdb.Catalog)
assert isinstance(catalog._ddf, nd.NestedFrame)
assert catalog.hc_structure.catalog_info.default_columns is not None
assert np.all(catalog.columns == filter_columns)
assert np.all(catalog.compute().columns == filter_columns)
assert isinstance(catalog.compute(), npd.NestedFrame)
assert_divisions_are_correct(catalog)
assert_index_correct(catalog)


def test_read_hats_no_pandas(small_sky_order1_no_pandas_dir, assert_divisions_are_correct):
catalog = lsdb.read_hats(small_sky_order1_no_pandas_dir)
assert isinstance(catalog, lsdb.Catalog)
Expand Down
Loading