-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathtest_preprocessing.py
595 lines (494 loc) · 19.7 KB
/
test_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
import itertools
import numpy as np
import pytest
import xarray as xr
from xmip.postprocessing import EXACT_ATTRS
from xmip.preprocessing import (
broadcast_lonlat,
cmip6_renaming_dict,
combined_preprocessing,
correct_coordinates,
correct_lon,
correct_units,
fix_metadata,
maybe_convert_bounds_to_vertex,
maybe_convert_vertex_to_bounds,
parse_lon_lat_bounds,
promote_empty_dims,
rename_cmip6,
replace_x_y_nominal_lat_lon,
sort_vertex_order,
_interp_nominal_lon,
)
def create_test_ds(xname, yname, zname, xlen, ylen, zlen):
x = np.linspace(0, 359, xlen)
y = np.linspace(-90, 89, ylen)
z = np.linspace(0, 5000, zlen)
data = np.random.rand(len(x), len(y), len(z))
ds = xr.DataArray(data, coords=[(xname, x), (yname, y), (zname, z)]).to_dataset(
name="test"
)
ds.attrs["source_id"] = "test_id"
# if x and y are not lon and lat, add lon and lat to make sure there are no conflicts
lon = ds[xname] * xr.ones_like(ds[yname])
lat = xr.ones_like(ds[xname]) * ds[yname]
if xname != "lon" and yname != "lat":
ds = ds.assign_coords(lon=lon, lat=lat)
else:
ds = ds.assign_coords(longitude=lon, latitude=lat)
return ds
@pytest.mark.parametrize("xname", ["i", "x"])
@pytest.mark.parametrize("yname", ["j", "y"])
@pytest.mark.parametrize("zname", ["lev", "olev", "olevel"])
@pytest.mark.parametrize("missing_dim", [None, "x", "y", "z"])
def test_rename_cmip6(xname, yname, zname, missing_dim):
xlen, ylen, zlen = (10, 5, 6)
ds = create_test_ds(xname, yname, zname, xlen, ylen, zlen)
if missing_dim == "x":
ds = ds.isel({xname: 0}).squeeze()
elif missing_dim == "y":
ds = ds.isel({yname: 0}).squeeze()
elif missing_dim == "z":
ds = ds.isel({zname: 0}).squeeze()
ds_renamed = rename_cmip6(ds, cmip6_renaming_dict())
assert set(ds_renamed.dims).issubset(set(["x", "y", "lev"]))
if not missing_dim == "x":
assert xlen == len(ds_renamed.x)
if not missing_dim == "y":
assert ylen == len(ds_renamed.y)
if not missing_dim == "z":
assert zlen == len(ds_renamed.lev)
@pytest.mark.parametrize("xname", ["i", "x"])
@pytest.mark.parametrize("yname", ["j", "y"])
def test_rename_cmip6_worst_case(xname, yname):
xlen, ylen, zlen = (10, 5, 6)
ds = create_test_ds(xname, yname, "lev", xlen, ylen, zlen)
print(ds.lon)
# now rename only some of the coordinates to the correct naming
ds = ds.assign_coords(
{"lon": ds.lon.reset_coords(drop=True).rename({xname: "x", yname: "y"})}
)
ds_renamed = rename_cmip6(ds, cmip6_renaming_dict())
assert set(ds_renamed.dims) == set(["x", "y", "lev"])
def test_broadcast_lonlat():
x = np.arange(-180, 179, 5)
y = np.arange(-90, 90, 6)
data = np.random.rand(len(x), len(y))
ds = xr.DataArray(data, dims=["x", "y"], coords={"x": x, "y": y}).to_dataset(
name="test"
)
expected = ds.copy()
expected.coords["lon"] = ds.x * xr.ones_like(ds.y)
expected.coords["lat"] = xr.ones_like(ds.x) * ds.y
ds_test = broadcast_lonlat(ds)
xr.testing.assert_identical(expected, ds_test)
def test_promote_empty_dims():
xlen, ylen, zlen = (10, 5, 6)
ds = create_test_ds("x", "y", "z", xlen, ylen, zlen)
ds = ds.drop_vars(["x", "y", "z"])
ds_promoted = promote_empty_dims(ds)
assert set(["x", "y", "z"]).issubset(set(ds_promoted.coords))
@pytest.mark.parametrize("nans", [True, False])
@pytest.mark.parametrize("dask", [True, False])
def test_replace_x_y_nominal_lat_lon(dask, nans):
x = np.linspace(0, 720, 10)
y = np.linspace(-200, 140, 5)
lon = xr.DataArray(np.linspace(0, 360, len(x)), coords=[("x", x)])
lat = xr.DataArray(np.linspace(-90, 90, len(y)), coords=[("y", y)])
llon = lon * xr.ones_like(lat)
llat = xr.ones_like(lon) * lat
data = np.random.rand(len(x), len(y))
ds = xr.DataArray(data, coords=[("x", x), ("y", y)]).to_dataset(name="data")
ds.coords["lon"] = llon
ds.coords["lat"] = llat
if nans:
lon = ds["lon"].load().data
lon[0, :] = np.nan
lon[-1, :] = np.nan
lon[:, 0] = np.nan
lon[:, -1] = np.nan
lon[15:23, 23:26] = np.nan
ds["lon"].data = lon
# for lats put only some nans in the middle.
# I currently have no way to interpolate lats at the edge.
lat = ds["lat"].load().data
lat[15:23, 23:26] = np.nan
ds["lat"].data = lat
if dask:
ds = ds.chunk({"x": -1, "y": -1})
ds.coords["lon"] = ds.coords["lon"].chunk({"x": -1, "y": -1})
ds.coords["lat"] = ds.coords["lat"].chunk({"x": -1, "y": -1})
replaced_ds = replace_x_y_nominal_lat_lon(ds)
assert all(~np.isnan(replaced_ds.x))
assert all(~np.isnan(replaced_ds.y))
assert all(replaced_ds.x.diff("x") > 0)
assert all(replaced_ds.y.diff("y") > 0)
assert len(replaced_ds.lon.shape) == 2
assert len(replaced_ds.lat.shape) == 2
assert set(replaced_ds.lon.dims) == set(["x", "y"])
assert set(replaced_ds.lat.dims) == set(["x", "y"])
assert all(~np.isnan(replaced_ds.x))
assert all(~np.isnan(replaced_ds.y))
# test a dataset that would result in duplicates with current method
x = np.linspace(0, 720, 4)
y = np.linspace(-200, 140, 3)
llon = xr.DataArray(
np.array([[0, 50, 100, 150], [0, 50, 100, 150], [0, 50, 100, 150]]),
coords=[("y", y), ("x", x)],
)
llat = xr.DataArray(
np.array([[0, 0, 10, 0], [10, 0, 0, 0], [20, 20, 20, 20]]),
coords=[("y", y), ("x", x)],
)
data = np.random.rand(len(x), len(y))
ds = xr.DataArray(data, coords=[("x", x), ("y", y)]).to_dataset(name="data")
ds.coords["lon"] = llon
ds.coords["lat"] = llat
if dask:
ds = ds.chunk({"x": -1, "y": -1})
ds.coords["lon"] = ds.coords["lon"].chunk({"x": -1, "y": -1})
ds.coords["lat"] = ds.coords["lat"].chunk({"x": -1, "y": -1})
replaced_ds = replace_x_y_nominal_lat_lon(ds)
assert all(~np.isnan(replaced_ds.x))
assert all(~np.isnan(replaced_ds.y))
assert len(replaced_ds.y) == len(np.unique(replaced_ds.y))
assert len(replaced_ds.x) == len(np.unique(replaced_ds.x))
# make sure values are sorted in ascending order
assert all(replaced_ds.x.diff("x") > 0)
assert all(replaced_ds.y.diff("y") > 0)
assert len(replaced_ds.lon.shape) == 2
assert len(replaced_ds.lat.shape) == 2
assert set(replaced_ds.lon.dims) == set(["x", "y"])
assert set(replaced_ds.lat.dims) == set(["x", "y"])
def test_interp_nominal_lon():
"""
Check that https://github.com/jbusecke/xMIP/issues/295 was fixed in https://github.com/jbusecke/xMIP/pull/296
In https://github.com/jbusecke/xMIP/blob/0270f4b4977d512adc2337d4a547b39e25d2f2da/tests/test_preprocessing.py,
the old issue was replicated (and illustrated that the tests would have failed then).
"""
def _get_dummy_longitude() -> np.ndarray:
# Totally arbitrary data (although len(lon) has to be > 360 to see the issue)
lon = np.linspace(0, 360, 513)[:-1]
# Add some NaN values just as an example
lon[2 + 30 : len(lon) // 2 + 50] = np.nan
return lon
def _lons_parsed_make_sense(
input_lons: np.ndarray, lons_parsed: np.ndarray
) -> bool:
"""
Check if the parsed longitudes make sense.
Since we know that the input-lons are all monotonically increasing, the parsed lons should also do that.
"""
accepted_differences_between_lon_coords = np.unique(np.diff(input_lons))
if len(accepted_differences_between_lon_coords) not in [1, 2]:
raise RuntimeError(
f"Cannot work with changed format of inputdata {accepted_differences_between_lon_coords}"
)
diff_pars_lons = np.unique(np.diff(lons_parsed))
return np.all(
[x in accepted_differences_between_lon_coords for x in diff_pars_lons]
)
lons = _get_dummy_longitude()
lons_parsed = _interp_nominal_lon(lons)
assert _lons_parsed_make_sense(lons, lons_parsed)
@pytest.mark.parametrize(
"coord",
[
"x",
"y",
"lon",
"lat",
"lev",
"lev_bounds",
"lon_bounds",
"lat_bounds",
"time_bounds",
"lat_verticies",
"lon_verticies",
],
)
def test_correct_coordinates(coord):
xlen, ylen, zlen = (10, 5, 6)
ds = create_test_ds("xx", "yy", "zz", xlen, ylen, zlen)
# set a new variable
ds = ds.assign({coord: ds.test})
ds_corrected = correct_coordinates(ds)
assert coord in list(ds_corrected.coords)
@pytest.mark.parametrize(
"bad_coord",
[
v
for values in cmip6_renaming_dict().values()
for v in values
if v not in cmip6_renaming_dict().keys()
],
)
def test_renamed_coordinates(bad_coord):
xlen, ylen, zlen = (10, 5, 6)
ds = create_test_ds("xx", "yy", "zz", xlen, ylen, zlen)
# set a new variable which we want to rename
ds = ds.assign({bad_coord: ds.test})
ds_corrected = correct_coordinates(ds)
assert bad_coord not in list(ds_corrected.coords)
def test_parse_lon_lat_bounds():
lon = np.arange(0, 10)
lat = np.arange(20, 30)
data = np.random.rand(len(lon), len(lat))
ds = xr.DataArray(data, dims=["x", "y"], coords={"x": lon, "y": lat}).to_dataset(
name="test"
)
ds.coords["lon"] = ds.x * xr.ones_like(ds.y)
ds.coords["lat"] = xr.ones_like(ds.x) * ds.y
ds.coords["lon_bounds"] = (
xr.DataArray([-0.1, -0.1, 0.1, 0.1], dims=["vertex"]) + ds["lon"]
)
ds.coords["lat_bounds"] = (
xr.DataArray([-0.1, 0.1, 0.1, -0.1], dims=["vertex"]) + ds["lat"]
)
ds_test = parse_lon_lat_bounds(ds)
assert "lon_verticies" in ds_test.coords
assert "lat_verticies" in ds_test.coords
# introduce a time diemension
for wrong_coord in ["lon_bounds", "lat_bounds"]:
# TODO: this should also test lev_bounds.
# Are there other coords that should be purged of the
ds_wrong = ds.copy()
ds_wrong.coords[wrong_coord] = ds_wrong.coords[wrong_coord] * xr.DataArray(
range(5), dims=["time"]
)
ds_test2 = parse_lon_lat_bounds(ds_wrong)
assert "time" in ds_wrong.dims
assert "time" not in ds_test2.variables
@pytest.mark.parametrize("missing_values", [False, 1e36, -1e36, 1001, -1001])
@pytest.mark.parametrize(
"shift",
[
-70,
-180,
-360,
],
) # cant handle positive shifts yet
def test_correct_lon(missing_values, shift):
xlen, ylen, zlen = (40, 20, 6)
ds = create_test_ds("x", "y", "lev", xlen, ylen, zlen)
ds = ds.assign_coords(x=ds.x.data + shift)
lon = ds["lon"].reset_coords(drop=True)
ds = ds.assign_coords(lon=lon + shift)
if missing_values:
# CESM-FV has some super high missing values. Test removal
lon = ds["lon"].load().data
lon[10:20, 10:20] = missing_values
ds["lon"].data = lon
ds_lon_corrected = correct_lon(ds)
assert ds_lon_corrected.lon.min() >= 0
assert ds_lon_corrected.lon.max() <= 360
def test_correct_units():
lev = np.arange(0, 200)
data = np.random.rand(*lev.shape)
ds = xr.DataArray(data, dims=["lev"], coords={"lev": lev}).to_dataset(name="test")
ds.attrs["source_id"] = "something"
ds.lev.attrs["units"] = "centimeters"
ds_test = correct_units(ds)
assert ds_test.lev.attrs["units"] == "m"
np.testing.assert_allclose(ds_test.lev.data, ds.lev.data / 100.0)
def test_correct_units_missing():
lev = np.arange(0, 200)
data = np.random.rand(*lev.shape)
ds = xr.DataArray(data, dims=["lev"], coords={"lev": lev}).to_dataset(name="test")
ds.attrs["source_id"] = "something"
# should this raise a warning but pass?
msg = "Unit correction failed with: Cannot convert variables"
with pytest.warns(UserWarning, match=msg):
ds_test = correct_units(ds)
assert "units" not in ds_test.lev.attrs.keys()
def test_maybe_convert_bounds_to_vertex():
# create a ds with bounds
lon = np.arange(0, 10)
lat = np.arange(20, 30)
data = np.random.rand(len(lon), len(lat))
ds = xr.DataArray(
data, dims=["lon", "lat"], coords={"lon": lon, "lat": lat}
).to_dataset(name="test")
for va in ["lon", "lat"]:
ds.coords[va + "_bounds"] = ds[va] + xr.DataArray([-0.01, 0.01], dims=["bnds"])
# create expected dataset
lon_b = xr.ones_like(ds.lat) * ds.coords["lon_bounds"]
lat_b = xr.ones_like(ds.lon) * ds.coords["lat_bounds"]
lon_v = xr.concat(
[lon_b.isel(bnds=ii).squeeze(drop=True) for ii in [0, 0, 1, 1]], dim="vertex"
)
lon_v = lon_v.reset_coords(drop=True)
lat_v = xr.concat(
[lat_b.isel(bnds=ii).squeeze(drop=True) for ii in [0, 1, 1, 0]], dim="vertex"
)
lat_v = lat_v.reset_coords(drop=True)
ds_expected = ds.copy()
ds_expected = ds_expected.assign_coords(lon_verticies=lon_v, lat_verticies=lat_v)
xr.testing.assert_identical(ds_expected, maybe_convert_bounds_to_vertex(ds))
# check that datasets that already conform to this are not changed
xr.testing.assert_identical(
ds_expected, maybe_convert_bounds_to_vertex(ds_expected)
)
def test_maybe_convert_vertex_to_bounds():
# create a ds with verticies
lon = np.arange(0, 10)
lat = np.arange(20, 30)
data = np.random.rand(len(lon), len(lat))
ds = xr.DataArray(data, dims=["x", "y"], coords={"x": lon, "y": lat}).to_dataset(
name="test"
)
ds.coords["lon"] = ds.x * xr.ones_like(ds.y)
ds.coords["lat"] = xr.ones_like(ds.x) * ds.y
ds.coords["lon_verticies"] = (
xr.DataArray([-0.1, -0.1, 0.1, 0.1], dims=["vertex"]) + ds["lon"]
)
ds.coords["lat_verticies"] = (
xr.DataArray([-0.1, 0.1, 0.1, -0.1], dims=["vertex"]) + ds["lat"]
)
ds = promote_empty_dims(ds)
# create expected dataset
ds_expected = ds.copy()
for va in ["lon", "lat"]:
ds_expected.coords[va + "_bounds"] = (
xr.DataArray([-0.1, 0.1], dims=["bnds"]) + ds_expected[va]
)
ds_expected = promote_empty_dims(ds_expected)
ds_test = maybe_convert_vertex_to_bounds(ds)
xr.testing.assert_identical(ds_expected, ds_test)
# check that datasets that already conform to this are not changed
xr.testing.assert_identical(
ds_expected, maybe_convert_vertex_to_bounds(ds_expected)
)
assert np.all(ds_test.lon_bounds.diff("bnds") > 0)
assert np.all(ds_test.lat_bounds.diff("bnds") > 0)
def test_sort_vertex_order():
ordered_points = np.array([[1, 1, 2, 2], [3, 4, 4, 3]]).T
# check every permutation of the points
for order in list(itertools.permutations([0, 1, 2, 3])):
points_scrambled = ordered_points[order, :]
# create xarray
lon_v = xr.DataArray(
points_scrambled[:, 0],
dims=["vertex"],
coords={"x": 0, "y": 0},
name="lon_bounds",
).expand_dims(["x", "y"])
lat_v = xr.DataArray(
points_scrambled[:, 1],
dims=["vertex"],
coords={"x": 0, "y": 0},
name="lat_bounds",
).expand_dims(["x", "y"])
da = (
xr.DataArray([np.nan], coords={"x": 0, "y": 0})
.expand_dims(["x", "y"])
.to_dataset(name="test")
)
da = da.assign_coords({"lon_verticies": lon_v, "lat_verticies": lat_v})
da_sorted = sort_vertex_order(da).squeeze()
new = np.vstack((da_sorted.lon_verticies, da_sorted.lat_verticies)).T
np.testing.assert_allclose(new, ordered_points)
assert da_sorted.lon_verticies.isel(vertex=0) < da_sorted.lon_verticies.isel(
vertex=3
)
assert da_sorted.lon_verticies.isel(vertex=1) < da_sorted.lon_verticies.isel(
vertex=2
)
assert da_sorted.lat_verticies.isel(vertex=0) < da_sorted.lat_verticies.isel(
vertex=1
)
assert da_sorted.lat_verticies.isel(vertex=3) < da_sorted.lat_verticies.isel(
vertex=2
)
# shift the vertex by one and see if the result is the same
da_shift = da.copy()
da_shift = da_shift.assign_coords(vertex=da_shift.vertex + 10)
da_sorted_shift = sort_vertex_order(da_shift).squeeze()
np.testing.assert_allclose(da_sorted_shift.vertex.data, np.arange(4))
def test_fix_metadata():
# Create a dataset with matching attrs
ds = xr.Dataset()
ds.attrs = {
"source_id": "GFDL-CM4",
"experiment_id": "historical",
"branch_time_in_parent": "nonsense",
}
ds_fixed = fix_metadata(ds)
assert ds_fixed.attrs["branch_time_in_parent"] == 91250
# Test that another dataset is untouched
ds = xr.Dataset()
ds.attrs = {
"source_id": "GFDL-CM4",
"experiment_id": "other",
"branch_time_in_parent": "nonsense",
}
ds_fixed = fix_metadata(ds)
assert ds_fixed.attrs["branch_time_in_parent"] == "nonsense"
# Combination test - involving #
@pytest.mark.parametrize("add_coords", [True, False])
@pytest.mark.parametrize("shift", [0, 10])
def test_combined_preprocessing_dropped_coords(add_coords, shift):
"""Check if coordinates are properly dropped"""
# create a 2d dataset
xlen, ylen, zlen = (10, 5, 1)
ds = (
create_test_ds("x", "y", "dummy", xlen, ylen, zlen).squeeze().drop_vars("dummy")
)
x_bnds = xr.concat([ds.x, ds.x], "bnds")
ds = ds.assign_coords(x_bounds=x_bnds)
if add_coords:
ds = ds.assign_coords(bnds=np.arange(len(ds.bnds)) + shift)
ds = combined_preprocessing(ds)
assert "bnds" not in ds.coords
def test_rename_mislabeled_coords():
"""Test if the renaming is applied to datavariables"""
# create a 2d dataset
xlen, ylen, zlen = (10, 5, 3)
ds = create_test_ds("x", "y", "z", xlen, ylen, zlen).squeeze()
ds["nav_lon"] = ds.lon # assign longitude as data variable
ds = ds.drop_vars(["lon"])
ds_pp = rename_cmip6(ds)
np.testing.assert_allclose(ds.nav_lon.data, ds_pp.lon.data)
def test_duplicate_renamed_coordinates():
# create a 2d dataset
xlen, ylen, zlen = (10, 5, 3)
ds = create_test_ds("x", "y", "lev", xlen, ylen, zlen)
ds = ds.drop_vars("lon") # drop the original longitude
# assign two coordinates which should both be renamed according to the renaming dict
coord_da_1 = xr.DataArray(np.random.rand(xlen, ylen), dims=["x", "y"])
coord_da_2 = xr.DataArray(np.random.rand(xlen, ylen), dims=["x", "y"])
ds = ds.assign_coords(longitude=coord_da_1, nav_lon=coord_da_2)
print(ds)
with pytest.warns(
match="While renaming to target `lon`, more than one candidate was found"
):
ds_pp = rename_cmip6(ds)
assert "nav_lon" in ds_pp.coords
xr.testing.assert_allclose(
ds_pp.lon.reset_coords(drop=True).drop(["x", "y"]), coord_da_1
)
def test_renamed_coordinate_exists():
# create a 2d dataset
xlen, ylen, zlen = (10, 5, 3)
ds = create_test_ds("x", "y", "lev", xlen, ylen, zlen)
# assign two coordinates which should both be renamed according to the renaming dict
coord_da = xr.DataArray(np.random.rand(xlen, ylen), dims=["x", "y"])
ds = ds.assign_coords(longitude=coord_da)
ds_pp = rename_cmip6(ds)
# make sure the original lon is intact
xr.testing.assert_allclose(ds_pp.lon, ds.lon)
assert "longitude" in ds_pp
def test_preserve_attrs():
# create a 2d dataset
xlen, ylen, zlen = (10, 5, 1)
ds = (
create_test_ds("x", "y", "dummy", xlen, ylen, zlen).squeeze().drop_vars("dummy")
)
ds.attrs = {"preserve_this": "here"}
# TODO: there are a bunch of errors if the metadata is not full.
# I should probably ignore them and still put the datset out?
# Well for now create one
for att in EXACT_ATTRS:
ds.attrs[att] = "a"
ds_pp = combined_preprocessing(ds)
assert ds_pp.attrs["preserve_this"] == "here"