Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow passing in pandas dataframes to x2sys_cross #591

Merged
merged 21 commits into from
Sep 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
1d9e72a
Implement tempfile_from_buffer for io.StringIO inputs
weiji14 Sep 2, 2020
e8cb8b6
Allow passing in kwargs to tempfile_from_buffer
weiji14 Sep 8, 2020
a64f47f
Mock X2SYS_HOME in a temporary X2SYS_TMP instead of current working dir
weiji14 Sep 8, 2020
9437850
Allow passing in pandas dataframes to x2sys_cross
weiji14 Sep 8, 2020
c32870c
Change working directory to inside X2SYS_TMP during test session
weiji14 Sep 8, 2020
ef98ab5
Merge branch 'master' into x2sys_cross_dataframes
weiji14 Sep 9, 2020
01c5ee5
Remove some info level verbose messages from x2sys_cross
weiji14 Sep 9, 2020
4132ff5
Revert "Allow passing in kwargs to tempfile_from_buffer"
weiji14 Sep 8, 2020
2c41ca4
Revert "Implement tempfile_from_buffer for io.StringIO inputs"
weiji14 Sep 2, 2020
aa3a521
Use tempfile_from_dftrack instead of tempfile_from_buffer
weiji14 Sep 9, 2020
fcd6cfe
Try closing the file stream before writing
weiji14 Sep 9, 2020
17a2f3e
Try different way of closing the tmpfile generated from dataframe
weiji14 Sep 9, 2020
1446429
Don't use GMTTempFile, just generate random filename and write to it
weiji14 Sep 9, 2020
a2b08be
Reduce git diff and make Windows tests pass by ignoring permission error
weiji14 Sep 9, 2020
8efb415
Revert "Mock X2SYS_HOME in a temporary X2SYS_TMP instead of current w…
weiji14 Sep 9, 2020
1b658c7
Revert "Remove some info level verbose messages from x2sys_cross"
weiji14 Sep 9, 2020
8430dd7
Better suffix finder for dataframe input into x2sys_cross
weiji14 Sep 9, 2020
b93a7aa
Test input two pandas dataframes to x2sys_cross with time column
weiji14 Sep 9, 2020
fb52df4
Merge remote-tracking branch 'upstream/master' into x2sys_cross_dataf…
weiji14 Sep 10, 2020
5338f37
Refactor to find suffix without using regex
weiji14 Sep 10, 2020
bb22363
Improve docstring of x2sys_cross and tempfile_from_dftrack
weiji14 Sep 10, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 47 additions & 14 deletions pygmt/tests/test_x2sys_cross.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def fixture_tracks():
Load track data from the sample bathymetry file
"""
dataframe = load_sample_bathymetry()
return [dataframe.query(expr="bathymetry > -20")] # reduce size of dataset
dataframe.columns = ["x", "y", "z"] # longitude, latitude, bathymetry
return [dataframe.query(expr="z > -20")] # reduce size of dataset


def test_x2sys_cross_input_file_output_file(mock_x2sys_home):
Expand Down Expand Up @@ -76,25 +77,57 @@ def test_x2sys_cross_input_file_output_dataframe(mock_x2sys_home):
def test_x2sys_cross_input_dataframe_output_dataframe(mock_x2sys_home, tracks):
"""
Run x2sys_cross by passing in one dataframe, and output external crossovers
to a pandas.DataFrame. Not actually implemented yet, wait for
https://github.com/GenericMappingTools/gmt/issues/3717
to a pandas.DataFrame.
"""
with TemporaryDirectory(prefix="X2SYS", dir=os.getcwd()) as tmpdir:
tag = os.path.basename(tmpdir)
x2sys_init(tag=tag, fmtfile="xyz", force=True)

with pytest.raises(NotImplementedError):
_ = x2sys_cross(tracks=tracks, tag=tag, coe="i", verbose="i")
output = x2sys_cross(tracks=tracks, tag=tag, coe="i", verbose="i")

# assert isinstance(output, pd.DataFrame)
# assert output.shape == (4, 12)
# columns = list(output.columns)
# assert columns[:6] == ["x", "y", "t_1", "t_2", "dist_1", "dist_2"]
# assert columns[6:] == ["head_1","head_2","vel_1","vel_2","z_X","z_M"]
# assert output.dtypes["t_1"].type == np.datetime64
# assert output.dtypes["t_2"].type == np.datetime64
assert isinstance(output, pd.DataFrame)
assert output.shape == (14, 12)
columns = list(output.columns)
assert columns[:6] == ["x", "y", "i_1", "i_2", "dist_1", "dist_2"]
assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"]
assert output.dtypes["i_1"].type == np.object_
assert output.dtypes["i_2"].type == np.object_

return output

# return output

def test_x2sys_cross_input_two_dataframes(mock_x2sys_home):
"""
Run x2sys_cross by passing in two pandas.DataFrame tables with a time
column, and output external crossovers to a pandas.DataFrame
"""
with TemporaryDirectory(prefix="X2SYS", dir=os.getcwd()) as tmpdir:
tag = os.path.basename(tmpdir)
x2sys_init(
tag=tag, fmtfile="xyz", suffix="xyzt", units=["de", "se"], force=True
)

# Add a time row to the x2sys fmtfile
with open(file=os.path.join(tmpdir, "xyz.fmt"), mode="a") as fmtfile:
fmtfile.write("time\ta\tN\t0\t1\t0\t%g\n")

# Create pandas.DataFrame track tables
tracks = []
for i in range(2):
np.random.seed(seed=i)
track = pd.DataFrame(data=np.random.rand(10, 3), columns=("x", "y", "z"))
track["time"] = pd.date_range(start=f"2020-{i}1-01", periods=10, freq="ms")
tracks.append(track)

output = x2sys_cross(tracks=tracks, tag=tag, coe="e", verbose="i")

assert isinstance(output, pd.DataFrame)
assert output.shape == (30, 12)
columns = list(output.columns)
assert columns[:6] == ["x", "y", "t_1", "t_2", "dist_1", "dist_2"]
assert columns[6:] == ["head_1", "head_2", "vel_1", "vel_2", "z_X", "z_M"]
assert output.dtypes["t_1"].type == np.datetime64
assert output.dtypes["t_2"].type == np.datetime64


def test_x2sys_cross_input_two_filenames(mock_x2sys_home):
Expand Down Expand Up @@ -131,7 +164,7 @@ def test_x2sys_cross_invalid_tracks_input_type(tracks):
Run x2sys_cross using tracks input that is not a pandas.DataFrame (matrix)
or str (file) type, which would raise a GMTInvalidInput error.
"""
invalid_tracks = tracks[0].to_xarray().bathymetry
invalid_tracks = tracks[0].to_xarray().z
assert data_kind(invalid_tracks) == "grid"
with pytest.raises(GMTInvalidInput):
x2sys_cross(tracks=[invalid_tracks])
Expand Down
64 changes: 57 additions & 7 deletions pygmt/x2sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
GMT supplementary X2SYS module for crossover analysis.
"""
import contextlib
import os
from pathlib import Path

import pandas as pd

Expand All @@ -14,10 +16,45 @@
dummy_context,
fmt_docstring,
kwargs_to_strings,
unique_name,
use_alias,
)


@contextlib.contextmanager
def tempfile_from_dftrack(track, suffix):
"""
Saves pandas.DataFrame track table to a temporary tab-separated ASCII text
file with a unique name (to prevent clashes when running x2sys_cross),
adding a suffix extension to the end.

Parameters
----------
track : pandas.DataFrame
A table holding track data with coordinate (x, y) or (lon, lat) values,
and (optionally) time (t).
suffix : str
File extension, e.g. xyz, tsv, etc.

Yields
------
tmpfilename : str
A temporary tab-separated value file with a unique name holding the
track data. E.g. 'track-1a2b3c4.tsv'.
"""
try:
tmpfilename = f"track-{unique_name()[:7]}.{suffix}"
track.to_csv(
path_or_buf=tmpfilename,
sep="\t",
index=False,
date_format="%Y-%m-%dT%H:%M:%S.%fZ",
)
yield tmpfilename
finally:
os.remove(tmpfilename)
Comment on lines +45 to +55
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original implementation using GMTTempFile/NamedTemporaryFile didn't work because of some permissions issues (on macOS/Windows), which is why this try-finally block is used.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code quality looks good. As you're the one who develops and uses these functions, we have to trust you. 😄

Just one suggestion, add the comment to the codes, explaining why you use unique_name here.

That's the first question when I read your codes before I see your comment here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code quality looks good. As you're the one who develops and uses these functions, we have to trust you. smile

It's all Paul's work done a decade ago, I'm just wrapping it in Python so more people can use it easily 😃 You won't believe how many 'crossover analysis' tools have been written again and again, but that's another story.

Just one suggestion, add the comment to the codes, explaining why you use unique_name here.

That's the first question when I read your codes before I see your comment here.

Ok, will do.



@fmt_docstring
@use_alias(
D="fmtfile",
Expand Down Expand Up @@ -158,9 +195,10 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs):

Parameters
----------
tracks : str or list
tracks : pandas.DataFrame or str or list
A table or a list of tables with (x, y) or (lon, lat) values in the
first two columns. Supported formats are ASCII, native binary, or
first two columns. Track(s) can be provided as pandas DataFrame tables
or file names. Supported file formats are ASCII, native binary, or
COARDS netCDF 1-D data. More columns may also be present.

If the filenames are missing their file extension, we will append the
Expand Down Expand Up @@ -263,8 +301,20 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs):
if kind == "file":
file_contexts.append(dummy_context(track))
elif kind == "matrix":
raise NotImplementedError(f"{type(track)} inputs are not supported yet")
# file_contexts.append(lib.virtualfile_from_matrix(track.values))
# find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from
# $X2SYS_HOME/TAGNAME/TAGNAME.tag file
lastline = (
Path(os.environ["X2SYS_HOME"], kwargs["T"], f"{kwargs['T']}.tag")
.read_text()
.strip()
.split("\n")[-1]
) # e.g. "-Dxyz -Etsv -I1/1"
for item in sorted(lastline.split()): # sort list alphabetically
if item.startswith(("-E", "-D")): # prefer -Etsv over -Dxyz
suffix = item[2:] # e.g. tsv (1st choice) or xyz (2nd choice)

# Save pandas.DataFrame track data to temporary file
file_contexts.append(tempfile_from_dftrack(track=track, suffix=suffix))
else:
raise GMTInvalidInput(f"Unrecognized data type: {type(track)}")

Expand All @@ -287,8 +337,8 @@ def x2sys_cross(tracks=None, outfile=None, **kwargs):
parse_dates=[2, 3], # Datetimes on 3rd and 4th column
)
# Remove the "# " from "# x" in the first column
result = table.rename(columns={table.columns[0]: table.columns[0][2:]})
table = table.rename(columns={table.columns[0]: table.columns[0][2:]})
elif outfile != tmpfile.name: # if outfile is set, output in outfile only
result = None
table = None

return result
return table