Skip to content

Commit

Permalink
Merge pull request #765 from DHI/bug/Dataset.extract_track_modifies_i…
Browse files Browse the repository at this point in the history
…nput

Bug/Dataset.extract_track_modifies_input
  • Loading branch information
ecomodeller authored Dec 6, 2024
2 parents 6016a2e + 2e26f44 commit d75fecc
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 46 deletions.
105 changes: 75 additions & 30 deletions mikeio/_track.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .dataset import Dataset
from .dfs import Dfs0
from .eum import ItemInfo
from .eum import ItemInfo, EUMUnit, EUMType
from .spatial import GeometryFM2D


Expand All @@ -33,33 +33,17 @@ def _extract_track(

n_items = len(item_numbers)

if isinstance(track, str):
filename = track
path = Path(filename)
if not path.exists():
raise ValueError(f"{filename} does not exist")

ext = path.suffix.lower()
if ext == ".dfs0":
df = Dfs0(filename).to_dataframe()
elif ext == ".csv":
df = pd.read_csv(filename, index_col=0, parse_dates=True)
df.index = pd.DatetimeIndex(df.index)
else:
raise ValueError(f"{ext} files not supported (dfs0, csv)")

times = df.index
coords = df.iloc[:, 0:2].to_numpy(copy=True)

elif isinstance(track, Dataset):
times = track.time
coords = np.zeros(shape=(len(times), 2))
coords[:, 0] = track[0].to_numpy().copy()
coords[:, 1] = track[1].to_numpy().copy()
else:
assert isinstance(track, pd.DataFrame)
times = track.index
coords = track.iloc[:, 0:2].to_numpy(copy=True)
match track:
case str():
times, coords = _get_track_data_from_file(track)
case Dataset():
times, coords = _get_track_data_from_dataset(track)
case pd.DataFrame():
times, coords = _get_track_data_from_dataframe(track)
case _:
raise ValueError(
"track must be a file name, a Dataset or a pandas DataFrame"
)

assert isinstance(
times, pd.DatetimeIndex
Expand Down Expand Up @@ -157,11 +141,72 @@ def is_EOF(step: int) -> bool:
data_list[item + 2][t] = dati[item]

if geometry.is_geo:
items_out = [ItemInfo("Longitude"), ItemInfo("Latitude")]
items_out = [
ItemInfo("Longitude", EUMType.Latitude_longitude, EUMUnit.degree),
ItemInfo("Latitude", EUMType.Latitude_longitude, EUMUnit.degree),
]
else:
items_out = [ItemInfo("x"), ItemInfo("y")]
items_out = [
ItemInfo("x", EUMType.Geographical_coordinate, EUMUnit.meter),
ItemInfo("y", EUMType.Geographical_coordinate, EUMUnit.meter),
]

for item_info in items:
items_out.append(item_info)

return Dataset(data_list, times, items_out)


def _get_track_data_from_dataset(track: Dataset) -> tuple[pd.DatetimeIndex, np.ndarray]:
times = track.time
coords = np.zeros(shape=(len(times), 2))
coords[:, 0] = track[0].to_numpy().copy()
coords[:, 1] = track[1].to_numpy().copy()
return times, coords


def _get_track_data_from_dataframe(
track: pd.DataFrame,
) -> tuple[pd.DatetimeIndex, np.ndarray]:
times = track.index
coords = track.iloc[:, 0:2].to_numpy(copy=True)
return times, coords


def _get_track_data_from_file(track: str) -> tuple[pd.DatetimeIndex, np.ndarray]:
filename = track
path = Path(filename)
if not path.exists():
raise FileNotFoundError(f"{filename} does not exist")

ext = path.suffix.lower()
match ext:
case ".dfs0":
df = Dfs0(filename).to_dataframe()
case ".csv":
df = pd.read_csv(filename, index_col=0, parse_dates=True)
df.index = pd.DatetimeIndex(df.index)
case _:
raise ValueError(f"{ext} files not supported (dfs0, csv)")

times = df.index
coords = df.iloc[:, 0:2].to_numpy(copy=True)

return times, coords


def _find_end_index(t_rel: pd.Index, end_time: pd.Timestamp) -> int:
# largest idx for which (times - self.end_time)<=0
tmp = np.where(t_rel <= 0)[0]
if len(tmp) == 0:
raise ValueError("No time overlap!")
i_end = tmp[-1]
return i_end


def _find_start_index(t_rel: pd.Index, start_time: pd.Timestamp) -> int:
tmp = np.where(t_rel >= 0)[0]
if len(tmp) == 0:
raise ValueError("No time overlap!")
i_start = tmp[0] # smallest idx for which t_rel>=0
return i_start
2 changes: 1 addition & 1 deletion mikeio/dataset/_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,7 @@ def extract_track(
geometry=self.geometry,
n_elements=self.shape[1], # TODO is there a better way to find out this?
track=track,
items=[self.item],
items=deepcopy([self.item]),
time_steps=list(range(self.n_timesteps)),
item_numbers=[0],
method=method,
Expand Down
2 changes: 1 addition & 1 deletion mikeio/dataset/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,7 @@ def extract_track(
geometry=self.geometry,
n_elements=self.shape[1], # TODO is there a better way to find out this?
track=track,
items=self.items,
items=deepcopy(self.items),
time_steps=time_steps,
item_numbers=item_numbers,
method=method,
Expand Down
24 changes: 11 additions & 13 deletions mikeio/dfsu/_dfsu.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import annotations
from copy import deepcopy
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -686,18 +687,15 @@ def extract_track(
Examples
--------
>>> dfsu = mikeio.open("tests/testdata/NorthSea_HD_and_windspeed.dfsu")
>>> ds = dfsu.extract_track("tests/testdata/altimetry_NorthSea_20171027.csv")
>>> ds
<mikeio.Dataset>
dims: (time:1115)
time: 2017-10-26 04:37:37 - 2017-10-30 20:54:47 (1115 non-equidistant records)
geometry: GeometryUndefined()
items:
0: Longitude <Undefined> (undefined)
1: Latitude <Undefined> (undefined)
2: Surface elevation <Surface Elevation> (meter)
3: Wind speed <Wind speed> (meter per sec)
```{python}
import mikeio
ds = (
mikeio.open("../data/NorthSea_HD_and_windspeed.dfsu")
.extract_track("../data/altimetry_NorthSea_20171027.csv")
)
ds
```
"""
dfs = DfsuFile.Open(self._filename)
Expand All @@ -714,7 +712,7 @@ def extract_track(
geometry=self.geometry,
n_elements=self.geometry.n_elements,
track=track,
items=items,
items=deepcopy(items),
time_steps=time_steps,
item_numbers=item_numbers,
method=method,
Expand Down
9 changes: 8 additions & 1 deletion tests/test_dfsu.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,9 +788,16 @@ def test_extract_track_from_dataset():
parse_dates=True,
)
df.index = pd.DatetimeIndex(df.index)
assert ds[0].name == "Sign. Wave Height"
track = ds.extract_track(df)

assert track[2].values[23] == approx(3.6284972794399653)
# This should not change the original dataset
track.rename({"Sign. Wave Height": "Hm0"}, inplace=True)
assert track["Hm0"].name == "Hm0"

assert ds[0].name == "Sign. Wave Height"

assert track["Hm0"].values[23] == approx(3.6284972794399653)
assert sum(np.isnan(track[2].to_numpy())) == 26
assert np.all(track[1].to_numpy() == df.latitude.values)

Expand Down

0 comments on commit d75fecc

Please sign in to comment.