Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support columns key in get_agasc_cone and improve caching #183

Merged
merged 10 commits into from
Jul 15, 2024
10 changes: 10 additions & 0 deletions .github/workflows/python-formatting.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: check format using ruff
on: [push]
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: chartboost/ruff-action@v1
with:
args: format --check
9 changes: 3 additions & 6 deletions .github/workflows/python-linting.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
name: Check Python formatting using Black and Ruff

name: lint code using ruff
on: [push]

jobs:
lint:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: psf/black@stable
- uses: actions/checkout@v4
- uses: chartboost/ruff-action@v1
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.5
rev: v0.5.1
hooks:
# Run the linter.
- id: ruff
Expand Down
117 changes: 101 additions & 16 deletions agasc/agasc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import tables
from astropy.table import Column, Table
from Chandra.Time import DateTime
from cxotime import CxoTimeLike, convert_time_format
from packaging.version import Version

from .healpix import get_healpix_index_table, get_stars_from_healpix_h5, is_healpix
Expand Down Expand Up @@ -42,6 +43,15 @@
MAG_CATID_SUPPLEMENT = 100
BAD_CLASS_SUPPLEMENT = 100

# Columns that are required for calls to get_agasc_cone
COLUMNS_REQUIRED = {
"RA",
"DEC",
"EPOCH",
"PM_DEC",
"PM_RA",
}

RA_DECS_CACHE = {}

COMMON_AGASC_FILE_DOC = """\
Expand Down Expand Up @@ -175,21 +185,22 @@ def read_h5_table(
row1: Optional[int] = None,
path="data",
cache=False,
columns: Optional[list | tuple] = None,
) -> np.ndarray:
"""
Read HDF5 table from group ``path`` in ``h5_file``.
Read HDF5 table ``columns`` from group ``path`` in ``h5_file``.

If ``row0`` and ``row1`` are specified then only the rows in that range are read,
e.g. ``data[row0:row1]``.

If ``cache`` is ``True`` then the data for the last read is cached in memory. The
cache key is ``(h5_file, path)`` and only one cache entry is kept. If ``h5_file``
is an HDF5 file object then the filename is used as the cache key.
cache key is ``(h5_file, path, columns)`` and up to 128 cache entries are
kept.

Parameters
----------
h5_file : str, Path, tables.file.File
Path to the HDF5 file to read or an open HDF5 file from ``tables.open_file``.
Path to the HDF5 file to read.
row0 : int, optional
First row to read. Default is None (read from first row).
row1 : int, optional
Expand All @@ -198,51 +209,66 @@ def read_h5_table(
Path to the data table in the HDF5 file. Default is 'data'.
cache : bool, optional
Whether to cache the read data. Default is False.
columns : list or tuple, optional
Column names to read from the file. If not specified, all columns are read.

Returns
-------
out : np.ndarray
The HDF5 data as a numpy structured array
"""
if columns is not None:
columns = tuple(columns)

if cache:
if isinstance(h5_file, tables.file.File):
h5_file = h5_file.filename
data = _read_h5_table_cached(h5_file, path)
data = _read_h5_table_cached(h5_file, path, columns)
out = data[row0:row1]
else:
out = _read_h5_table(h5_file, path, row0, row1)
out = _read_h5_table(h5_file, path, row0, row1, columns)

return out


@functools.lru_cache(maxsize=1)
@functools.lru_cache
def _read_h5_table_cached(
h5_file: str | Path,
path: str,
columns: tuple | None = None,
) -> np.ndarray:
return _read_h5_table(h5_file, path, row0=None, row1=None)
return _read_h5_table(h5_file, path, row0=None, row1=None, columns=columns)


def _read_h5_table(
h5_file: str | Path | tables.file.File,
path: str,
row0: None | int,
row1: None | int,
columns: tuple | None = None,
) -> np.ndarray:
if isinstance(h5_file, tables.file.File):
out = _read_h5_table_from_open_h5_file(h5_file, path, row0, row1)
out = _read_h5_table_from_open_h5_file(h5_file, path, row0, row1, columns)
else:
with tables.open_file(h5_file) as h5:
out = _read_h5_table_from_open_h5_file(h5, path, row0, row1)
out = _read_h5_table_from_open_h5_file(h5, path, row0, row1, columns)

out = np.asarray(out) # Convert to structured ndarray (not recarray)
return out


def _read_h5_table_from_open_h5_file(h5, path, row0, row1):
def _read_h5_table_from_open_h5_file(
h5: tables.file.File,
path: str,
row0: int,
row1: int,
columns: tuple | None = None,
):
data = getattr(h5.root, path)
out = data.read(start=row0, stop=row1)
return out
if columns:
out = np.rec.fromarrays([out[col] for col in columns], names=columns)
return np.asarray(out)


def get_agasc_filename(
Expand Down Expand Up @@ -383,7 +409,7 @@ def sphere_dist(ra1, dec1, ra2, dec2):
return np.degrees(dists)


def update_color1_column(stars):
def update_color1_column(stars: Table):
"""
For any stars which have a V-I color (RSV3 > 0) and COLOR1 == 1.5
then set COLOR1 = COLOR2 * 0.850. For such stars the MAG_ACA / MAG_ACA_ERR
Expand All @@ -396,8 +422,11 @@ def update_color1_column(stars):
https://heasarc.nasa.gov/W3Browse/all/tycho2.html for a reminder of the
scaling between the two.

This updates ``stars`` in place.
This updates ``stars`` in place if the COLOR1 column is present.
"""
if "COLOR1" not in stars.columns:
return

# Select red stars that have a reliable mag in AGASC 1.7 and later.
color15 = np.isclose(stars["COLOR1"], 1.5) & (stars["RSV3"] > 0)
new_color1 = stars["COLOR2"][color15] * 0.850
Expand Down Expand Up @@ -466,6 +495,8 @@ def get_agasc_cone(
pm_filter=True,
fix_color1=True,
use_supplement=None,
matlab_pm_bug=False,
columns=None,
cache=False,
):
"""
Expand Down Expand Up @@ -493,6 +524,9 @@ def get_agasc_cone(
:param fix_color1: set COLOR1=COLOR2 * 0.85 for stars with V-I color
:param use_supplement: Use estimated mag from AGASC supplement where available
(default=value of AGASC_SUPPLEMENT_ENABLED env var, or True if not defined)
:param matlab_pm_bug: Apply MATLAB proper motion bug prior to the MAY2118A loads
(default=False)
:param columns: Columns to return (default=all)
:param cache: Cache the AGASC data in memory (default=False)

:returns: astropy Table of AGASC entries
Expand All @@ -507,7 +541,27 @@ def get_agasc_cone(
# Possibly expand initial radius to allow for slop due proper motion
rad_pm = radius + (0.1 if pm_filter else 0.0)

stars = get_stars_func(ra, dec, rad_pm, agasc_file, cache)
# Ensure that the columns we need are read from the AGASC file, excluding PMCORR
# columns if supplied since they are not in the HDF5.
columns_query = (
None
if columns is None
else tuple(
column for column in columns if column not in ("RA_PMCORR", "DEC_PMCORR")
)
)

# Minimal columns to compute PM-corrected positions and do filtering.
if columns and COLUMNS_REQUIRED - set(columns):
raise ValueError(f"columns must include all of {COLUMNS_REQUIRED}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this seems fine and is solid explicit coding style, I'm just not sure if it would be a lighter touch to just add any needed columns to columns.


stars = get_stars_func(
ra, dec, rad_pm, agasc_file=agasc_file, columns=columns_query, cache=cache
)

if matlab_pm_bug:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this bug, I don't know if it should be applied before or after the filtering. But hopefully it didn't matter in your initial testing in the original kadi implementation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In practice the order doesn't matter since we have enough margin on the cone edge.

apply_matlab_pm_bug(stars, date)

add_pmcorr_columns(stars, date)
if fix_color1:
update_color1_column(stars)
Expand All @@ -524,11 +578,38 @@ def get_agasc_cone(
return stars


def apply_matlab_pm_bug(stars: Table, date: CxoTimeLike):
"""Account for a bug in MATLAB proper motion correction.

The bug was not dividing the RA proper motion by cos(dec), so here we premultiply by
that factor so that add_pmcorr_columns() will match MATLAB. This is mostly for use
in kadi.commands.observations.set_star_ids() to match flight catalogs created with
MATLAB.

This bug was fixed starting with the MAY2118A loads (MATLAB Tools 2018115).

Parameters
----------
stars : astropy.table.Table
Table of stars from the AGASC
date : CxoTimeLike
Date for proper motion correction
"""
if convert_time_format(date, "date") < "2018:141:03:35:03.000":
ok = stars["PM_RA"] != -9999
# Note this is an int16 field so there is some rounding error, but for
# the purpose of star identification this is fine.
stars["PM_RA"][ok] = np.round(
stars["PM_RA"][ok] * np.cos(np.deg2rad(stars["DEC"][ok]))
)


def get_stars_from_dec_sorted_h5(
ra: float,
dec: float,
radius: float,
agasc_file: str | Path,
columns: Optional[list[str] | tuple[str]] = None,
cache: bool = False,
) -> Table:
"""
Expand All @@ -544,6 +625,8 @@ def get_stars_from_dec_sorted_h5(
The radius of the search circle, in degrees.
agasc_file : str or Path
The path to the AGASC HDF5 file.
columns : list or tuple, optional
The columns to read from the AGASC file. If not specified, all columns are read.
cache : bool, optional
Whether to cache the AGASC data in memory. Default is False.

Expand All @@ -558,7 +641,9 @@ def get_stars_from_dec_sorted_h5(
dists = sphere_dist(ra, dec, ra_decs.ra[idx0:idx1], ra_decs.dec[idx0:idx1])
ok = dists <= radius

stars = read_h5_table(agasc_file, row0=idx0, row1=idx1, cache=cache)
stars = read_h5_table(
agasc_file, row0=idx0, row1=idx1, cache=cache, columns=columns
)
stars = Table(stars[ok])

return stars
Expand Down
7 changes: 6 additions & 1 deletion agasc/healpix.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def get_stars_from_healpix_h5(
dec: float,
radius: float,
agasc_file: str | Path,
columns: list[str] | tuple[str] | None = None,
cache: bool = False,
) -> Table:
"""
Expand All @@ -97,6 +98,8 @@ def get_stars_from_healpix_h5(
Radius of the search cone, in degrees.
agasc_file : str or Path
Path to the HDF5 file containing the AGASC data with a HEALPix index.
columns : list or tuple, optional
The columns to read from the AGASC file. If not specified, all columns are read.
cache : bool, optional
Whether to cache the AGASC data in memory. Default is False.

Expand All @@ -121,7 +124,9 @@ def get_stars_from_healpix_h5(
def make_stars_list(h5_file):
for healpix_index in healpix_indices:
idx0, idx1 = healpix_index_map[healpix_index]
stars = read_h5_table(h5_file, row0=idx0, row1=idx1, cache=cache)
stars = read_h5_table(
h5_file, row0=idx0, row1=idx1, cache=cache, columns=columns
)
stars_list.append(stars)

if cache:
Expand Down
1 change: 0 additions & 1 deletion agasc/scripts/mag_estimate_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def get_parser():


def main():

args = get_parser().parse_args()

args.output_dir = Path(os.path.expandvars(args.output_dir))
Expand Down
2 changes: 1 addition & 1 deletion agasc/scripts/update_mag_supplement.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


"""

import argparse
import logging
import os
Expand Down Expand Up @@ -245,7 +246,6 @@ def get_next_file_name(file_name):


def main():

args = get_args()
args_log_file = args.pop("args_log_file")

Expand Down
2 changes: 1 addition & 1 deletion agasc/supplement/magnitudes/mag_estimate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np
import scipy.special
import scipy.stats
import Ska.quatutil
from astropy.table import Table, vstack
from Chandra.Time import DateTime
from chandra_aca.transform import count_rate_to_mag, pixels_to_yagzag
Expand All @@ -21,7 +22,6 @@
from mica.archive.aca_dark.dark_cal import get_dark_cal_image
from Quaternion import Quat

import Ska.quatutil
from agasc import get_star

from . import star_obs_catalogs
Expand Down
10 changes: 5 additions & 5 deletions agasc/supplement/magnitudes/mag_estimate_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,9 @@ def multi_star_html(
agasc_stats["t_mean_dr3"] - agasc_stats["mag_aca"]
) / agasc_stats["mag_aca_err"]
agasc_stats["new"] = True
agasc_stats["new"][
np.in1d(agasc_stats["agasc_id"], updated_star_ids)
] = False
agasc_stats["new"][np.in1d(agasc_stats["agasc_id"], updated_star_ids)] = (
False
)
agasc_stats["update_mag_aca"] = np.nan
agasc_stats["update_mag_aca_err"] = np.nan
agasc_stats["last_obs"] = CxoTime(agasc_stats["last_obs_time"]).date
Expand Down Expand Up @@ -457,7 +457,7 @@ def plot_agasc_id_single(
timeline["std"] = np.nan
timeline["mag_mean"] = np.nan
timeline["mag_std"] = np.nan
for obsid in np.unique(timeline["obsid"]):
for obsid in np.unique(timeline["obsid"]): # noqa: PLR1704
sel = obs_stats["obsid"] == obsid
if draw_obs_mag_stats and np.any(sel):
timeline["mag_mean"][timeline["obsid"] == obsid] = obs_stats[sel][
Expand Down Expand Up @@ -825,7 +825,7 @@ def plot_flags(telemetry, ax=None, obsid=None):
all_ok = timeline["obs_ok"] & all_ok
flags = [("OBS not OK", ~timeline["obs_ok"])] + flags

for obsid in obsids:
for obsid in obsids: # noqa: PLR1704
limits[obsid] = (
timeline["x"][timeline["obsid"] == obsid].min(),
timeline["x"][timeline["obsid"] == obsid].max(),
Expand Down
1 change: 0 additions & 1 deletion agasc/supplement/magnitudes/star_obs_catalogs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import numpy as np
import tables
import agasc
from astropy import table
from astropy.table import Table, join
from chandra_aca.transform import yagzag_to_pixels
Expand Down
Loading