From 2a0177902210d3cb5ce982f9cc114c5abb852f30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Wed, 31 Jan 2024 18:08:33 +0100 Subject: [PATCH 01/14] Pin pytest for now. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 47ca719bf..bd4bdc871 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ 'nbsmoke[verify] >0.5', 'netcdf4', 'pyarrow', - 'pytest', + 'pytest <8', # Fails lint with IPynbFile is deprecated 'pytest-benchmark', 'pytest-cov', 'rasterio', From af10be7a099b2eb777979ac3259f516e01de6984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Wed, 31 Jan 2024 18:18:32 +0100 Subject: [PATCH 02/14] Update to ddf = ddf.repartition(npartitions=npartitions) --- datashader/tests/test_dask.py | 114 +++++++++++++++++----------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py index 38778c039..2c84021d1 100644 --- a/datashader/tests/test_dask.py +++ b/datashader/tests/test_dask.py @@ -124,7 +124,7 @@ def test_gpu_dependencies(): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_count(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray(np.array([[5, 5], [5, 5]], dtype='i4'), coords=coords, dims=dims) @@ -140,7 +140,7 @@ def test_count(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_any(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray(np.array([[True, True], [True, True]]), coords=coords, dims=dims) @@ -155,7 +155,7 @@ def test_any(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_sum(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( values(df_pd.i32).reshape((2, 2, 5)).sum(axis=2, dtype='f8').T, @@ -173,7 +173,7 @@ def test_sum(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_first(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray([[0, 10], [5, 15]], coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.first('i32')), out) @@ -185,7 +185,7 @@ def test_first(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_last(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray([[4, 14], [9, 19]], coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.last('i32')), out) @@ -197,7 +197,7 @@ def test_last(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_min(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( values(df_pd.i64).reshape((2, 2, 5)).min(axis=2).astype('f8').T, @@ -211,7 +211,7 @@ def test_min(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_max(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( values(df_pd.i64).reshape((2, 2, 5)).max(axis=2).astype('f8').T, @@ -225,7 +225,7 @@ def test_max(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_min_row_index(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray([[0, 10], [5, 15]], coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds._min_row_index()), out) @@ -234,7 +234,7 @@ def test_min_row_index(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_max_row_index(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray([[4, 14], [9, 19]], coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds._max_row_index()), out) @@ -243,7 +243,7 @@ def test_max_row_index(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_min_n(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[-3, -1, 0, 4, nan, nan], [-13, -11, 10, 12, 14, nan]], [[-9, -7, -5, 6, 8, nan], [-19, -17, -15, 16, 18, nan]]]) @@ -258,7 +258,7 @@ def test_min_n(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_max_n(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[4, 0, -1, -3, nan, nan], [14, 12, 10, -11, -13, nan]], [[8, 6, -5, -7, -9, nan], [18, 16, -15, -17, -19, nan]]]) @@ -273,7 +273,7 @@ def test_max_n(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_min_n_row_index(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[0, 1, 2, 3, 4, -1], [10, 11, 12, 13, 14, -1]], [[5, 6, 7, 8, 9, -1], [15, 16, 17, 18, 19, -1]]]) @@ -288,7 +288,7 @@ def test_min_n_row_index(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_max_n_row_index(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[4, 3, 2, 1, 0, -1], [14, 13, 12, 11, 10, -1]], [[9, 8, 7, 6, 5, -1], [19, 18, 17, 16, 15, -1]]]) @@ -303,7 +303,7 @@ def test_max_n_row_index(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_first_n(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[0, -1, -3, 4, nan, nan], [10, -11, 12, -13, 14, nan]], [[-5, 6, -7, 8, -9, nan], [-15, 16, -17, 18, -19, nan]]]) @@ -319,7 +319,7 @@ def test_first_n(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_last_n(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[4, -3, -1, 0, nan, nan], [14, -13, 12, -11, 10, nan]], [[-9, 8, -7, 6, -5, nan], [-19, 18, -17, 16, -15, nan]]]) @@ -334,7 +334,7 @@ def test_last_n(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_count(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol = np.array([[[2, 1, 1, 1], [1, 1, 2, 1]], [[1, 2, 1, 1], [1, 1, 1, 2]]], dtype=np.uint32) assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2')).data, sol) @@ -347,7 +347,7 @@ def test_categorical_count(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_min(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_int = np.array([[[0, 1, 2, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]], dtype=np.float64) @@ -361,7 +361,7 @@ def test_categorical_min(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_max(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_int = np.array([[[4, 1, 2, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]], dtype=np.float64) @@ -375,7 +375,7 @@ def test_categorical_max(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_min_n(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[[0, 4, nan], [1, nan, nan], [nan, nan, nan], [3, nan, nan]], [[12, nan, nan], [13, nan, nan], [10, 14, nan], [11, nan, nan]]], @@ -393,7 +393,7 @@ def test_categorical_min_n(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_max_n(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[[4, 0, nan], [1, nan, nan], [nan, nan, nan], [3, nan, nan]], [[12, nan, nan], [13, nan, nan], [14, 10, nan], [11, nan, nan]]], @@ -411,7 +411,7 @@ def test_categorical_max_n(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_min_row_index(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[0, 1, 2, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]]) agg = c.points(ddf, 'x', 'y', ds.by('cat2', ds._min_row_index())) @@ -421,7 +421,7 @@ def test_categorical_min_row_index(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_max_row_index(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[4, 1, 2, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]]) agg = c.points(ddf, 'x', 'y', ds.by('cat2', ds._max_row_index())) @@ -431,7 +431,7 @@ def test_categorical_max_row_index(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_min_n_row_index(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[[0, 4, -1], [1, -1, -1], [2, -1, -1], [3, -1, -1]], [[12, -1, -1], [13, -1, -1], [10, 14, -1], [11, -1, -1]]], @@ -449,7 +449,7 @@ def test_categorical_min_n_row_index(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_max_n_row_index(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[[4, 0, -1], [1, -1, -1], [2, -1, -1], [3, -1, -1]], [[12, -1, -1], [13, -1, -1], [14, 10, -1], [11, -1, -1]]], @@ -467,7 +467,7 @@ def test_categorical_max_n_row_index(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_first(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[0, -1, nan, -3], [12, -13, 10, -11]], @@ -481,7 +481,7 @@ def test_categorical_first(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_last(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[4, -1, nan, -3], [12, -13, 14, -11]], @@ -495,7 +495,7 @@ def test_categorical_last(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_first_n(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[[0, 4, nan], [-1, nan, nan], [nan, nan, nan], [-3, nan, nan]], [[12, nan, nan], [-13, nan, nan], [10, 14, nan], [-11, nan, nan]]], @@ -513,7 +513,7 @@ def test_categorical_first_n(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_last_n(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions solution = np.array([[[[4, 0, nan], [-1, nan, nan], [nan, nan, nan], [-3, nan, nan]], [[12, nan, nan], [-13, nan, nan], [14, 10, nan], [-11, nan, nan]]], @@ -533,7 +533,7 @@ def test_categorical_last_n(ddf, npartitions): def test_where_max(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray([[16, 6], [11, 1]], coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.where(ds.max('i32'), 'reverse')), out) @@ -554,7 +554,7 @@ def test_where_max(ddf, npartitions): def test_where_min(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray([[20, 10], [15, 5]], coords=coords, dims=dims) assert_eq_xr(c.points(ddf, 'x', 'y', ds.where(ds.min('i32'), 'reverse')), out) @@ -575,7 +575,7 @@ def test_where_min(ddf, npartitions): def test_where_max_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = np.array([[[ 4, 0, 1, 3, -1, -1], [14, 12, 10, 11, 13, -1]], @@ -608,7 +608,7 @@ def test_where_max_n(ddf, npartitions): def test_where_min_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = np.array([[[3, 1, 0, 4, -1, -1], [13, 11, 10, 12, 14, -1]], @@ -639,7 +639,7 @@ def test_where_min_n(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_first(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions # Note reductions like ds.where(ds.first('i32'), 'reverse') are supported, # but the same results can be achieved using the simpler ds.first('reverse') @@ -660,7 +660,7 @@ def test_where_first(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_where_last(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions # Note reductions like ds.where(ds.last('i32'), 'reverse') are supported, # but the same results can be achieved using the simpler ds.last('reverse') @@ -683,7 +683,7 @@ def test_where_last(ddf, npartitions): def test_where_first_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = np.array([[[ 0, 1, 3, 4, -1, -1], [10, 11, 12, 13, 14, -1]], @@ -716,7 +716,7 @@ def test_where_first_n(ddf, npartitions): def test_where_last_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = np.array([[[ 4, 3, 1, 0, -1, -1], [14, 13, 12, 11, 10, -1]], @@ -747,7 +747,7 @@ def test_where_last_n(ddf, npartitions): @pytest.mark.parametrize('ddf', [_ddf]) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_summary_by(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions # summary(by) @@ -785,7 +785,7 @@ def test_summary_by(ddf, npartitions): def test_summary_where_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_min_n_rowindex = np.array([[[3, 1, 0, 4, -1], [13, 11, 10, 12, 14]], @@ -821,7 +821,7 @@ def test_summary_where_n(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_mean(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( values(df_pd.i32).reshape((2, 2, 5)).mean(axis=2, dtype='f8').T, @@ -838,7 +838,7 @@ def test_mean(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_var(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( values(df_pd.i32).reshape((2, 2, 5)).var(axis=2, dtype='f8').T, @@ -855,7 +855,7 @@ def test_var(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_std(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions out = xr.DataArray( values(df_pd.i32).reshape((2, 2, 5)).std(axis=2, dtype='f8').T, @@ -872,7 +872,7 @@ def test_std(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_count_cat(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol = np.array([[[5, 0, 0, 0], [0, 0, 5, 0]], @@ -930,7 +930,7 @@ def test_count_cat(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_sum(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol = np.array([[[ 10, nan, nan, nan], [nan, nan, 60, nan]], @@ -977,7 +977,7 @@ def test_categorical_sum_binning(ddf, npartitions): pytest.skip( "The categorical binning of 'sum' reduction is yet supported on the GPU" ) - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol = np.array([[[8.0, nan, nan, nan], [nan, nan, 60.0, nan]], @@ -1000,7 +1000,7 @@ def test_categorical_sum_binning(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_mean(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol = np.array([[[ 2, nan, nan, nan], [nan, nan, 12, nan]], @@ -1036,7 +1036,7 @@ def test_categorical_mean_binning(ddf, npartitions): pytest.skip( "The categorical binning of 'mean' reduction is yet supported on the GPU" ) - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol = np.array([[[ 2, nan, nan, nan], [nan, nan, 12, nan]], @@ -1057,7 +1057,7 @@ def test_categorical_mean_binning(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_var(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol = np.array([[[ 2.5, nan, nan, nan], [ nan, nan, 2., nan]], @@ -1099,7 +1099,7 @@ def test_categorical_var(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_categorical_std(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol = np.sqrt(np.array([ [[ 2.5, nan, nan, nan], @@ -1143,7 +1143,7 @@ def test_categorical_std(ddf, npartitions): @pytest.mark.parametrize('ddf', ddfs) @pytest.mark.parametrize('npartitions', [1, 2, 3, 4]) def test_multiple_aggregates(ddf, npartitions): - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions agg = c.points(ddf, 'x', 'y', ds.summary(f64_std=ds.std('f64'), @@ -2365,7 +2365,7 @@ def test_canvas_size(): def test_dataframe_dtypes(ddf, npartitions): # Issue #1235. ddf['dates'] = pd.Series(['2007-07-13']*20, dtype='datetime64[ns]') - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions ds.Canvas(2, 2).points(ddf, 'x', 'y', ds.count()) @@ -2408,7 +2408,7 @@ def test_dask_categorical_counts(on_gpu): def test_categorical_where_max(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = xr.DataArray([[[4, 1, -1, 3], [12, 13, 14, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]], @@ -2430,7 +2430,7 @@ def test_categorical_where_max(ddf, npartitions): def test_categorical_where_min(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = xr.DataArray([[[0, 1, -1, 3], [12, 13, 10, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]], @@ -2452,7 +2452,7 @@ def test_categorical_where_min(ddf, npartitions): def test_categorical_where_first(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = xr.DataArray([[[0, 1, -1, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]], @@ -2474,7 +2474,7 @@ def test_categorical_where_first(ddf, npartitions): def test_categorical_where_last(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = xr.DataArray([[[4, 1, -1, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]], @@ -2496,7 +2496,7 @@ def test_categorical_where_last(ddf, npartitions): def test_categorical_where_max_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = xr.DataArray( [[[[4, 0, -1], [1, -1, -1], [-1, -1, -1], [3, -1, -1]], @@ -2534,7 +2534,7 @@ def test_categorical_where_max_n(ddf, npartitions): def test_categorical_where_min_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = xr.DataArray( [[[[0, 4, -1], [1, -1, -1], [-1, -1, -1], [3, -1, -1]], @@ -2572,7 +2572,7 @@ def test_categorical_where_min_n(ddf, npartitions): def test_categorical_where_first_n(ddf, npartitions): # Important to test with npartitions > 2 to have multiple combination stages. # Identical results to equivalent pandas test. - ddf = ddf.repartition(npartitions) + ddf = ddf.repartition(npartitions=npartitions) assert ddf.npartitions == npartitions sol_rowindex = xr.DataArray( [[[[0, 4, -1], [1, -1, -1], [-1, -1, -1], [3, -1, -1]], From 38e6964d9149c845120bf859cd300bfb7b8052fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Wed, 31 Jan 2024 18:25:43 +0100 Subject: [PATCH 03/14] Ignore test_ffil_limit_area --- datashader/tests/test_datatypes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py index 243cee870..efa945846 100644 --- a/datashader/tests/test_datatypes.py +++ b/datashader/tests/test_datatypes.py @@ -873,6 +873,11 @@ def test_fillna_no_op_returns_copy(self): def test_fillna_series_method(self): pass + @pytest.mark.skip(reason="Can't fill with nested sequences") + def test_ffill_limit_area(self): + # Added in Pandas 2.2 + pass + class TestRaggedReshaping(eb.BaseReshapingTests): @pytest.mark.skip(reason="__setitem__ not supported") From 6fb8e3d125f975c319884625474ee79f5238dc70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Wed, 31 Jan 2024 19:06:13 +0100 Subject: [PATCH 04/14] Work-around Pandas 2.2 changes for Extension --- datashader/tests/test_datatypes.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py index efa945846..591ce37f0 100644 --- a/datashader/tests/test_datatypes.py +++ b/datashader/tests/test_datatypes.py @@ -835,6 +835,12 @@ def test_sort_values_frame(self): def test_where_series(self): pass + @pytest.mark.xfail(reason="not currently supported") + def test_duplicated(self): + # Added in Pandas 2.2 + # https://github.com/pandas-dev/pandas/pull/55255 + pass + class TestRaggedPrinting(eb.BasePrintingTests): @pytest.mark.skip(reason="Can't autoconvert ragged array to numpy array") def test_dataframe_repr(self): @@ -891,3 +897,11 @@ def test_transpose(self): @pytest.mark.skip(reason="transpose with numpy array elements seems not supported") def test_transpose_frame(self): pass + + @pytest.mark.skipif(Version(pd.__version__) >= Version("2.2"), reason="Stopped working in Pandas 2.2") + def test_merge_on_extension_array(self, data): + super().test_merge_on_extension_array(data) + + @pytest.mark.skipif(Version(pd.__version__) >= Version("2.2"), reason="Stopped working in Pandas 2.2") + def test_merge_on_extension_array_duplicates(self, data): + super().test_merge_on_extension_array_duplicates(data) From 2639541b48f837f2da3a1aa2cc4c20980d22ed1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Wed, 31 Jan 2024 19:18:01 +0100 Subject: [PATCH 05/14] Fix lint --- datashader/tests/test_datatypes.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py index 591ce37f0..6a5378a24 100644 --- a/datashader/tests/test_datatypes.py +++ b/datashader/tests/test_datatypes.py @@ -898,10 +898,14 @@ def test_transpose(self): def test_transpose_frame(self): pass - @pytest.mark.skipif(Version(pd.__version__) >= Version("2.2"), reason="Stopped working in Pandas 2.2") + @pytest.mark.skipif( + Version(pd.__version__) >= Version("2.2"), reason="Stopped working in Pandas 2.2" + ) def test_merge_on_extension_array(self, data): super().test_merge_on_extension_array(data) - @pytest.mark.skipif(Version(pd.__version__) >= Version("2.2"), reason="Stopped working in Pandas 2.2") + @pytest.mark.skipif( + Version(pd.__version__) >= Version("2.2"), reason="Stopped working in Pandas 2.2" + ) def test_merge_on_extension_array_duplicates(self, data): super().test_merge_on_extension_array_duplicates(data) From 5468a0d94b028e9bd8de95b16a4f501f5944372e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 1 Feb 2024 09:12:22 +0100 Subject: [PATCH 06/14] Move pytest setting to pyproject.toml --- pyproject.toml | 15 +++++++++++++++ tox.ini | 8 -------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0528dfc7c..b74125734 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,3 +18,18 @@ target-version = "py39" [tool.ruff.per-file-ignores] "test_mpl_ext.py" = ["E402"] # Module level import not at top of file + +[tool.pytest.ini_options] +addopts = ["--pyargs", "--doctest-modules", "--doctest-ignore-import-errors", "--strict-config", "--strict-markers", "--color=yes"] +norecursedirs = 'doc .git dist build _build .ipynb_checkpoints' +minversion = "7" +xfail_strict = true +log_cli_level = "INFO" +nbsmoke_skip_run = [ # skipping any notebooks that require extra deps + '.*tiling.ipynb$', + '.*streaming-aggregation.ipynb$', + '.*8_Geography.ipynb$', +] +filterwarnings = [ + "ignore:Passing a (SingleBlockManager|BlockManager) to (Series|GeoSeries|DataFrame|GeoDataFrame) is deprecated:DeprecationWarning", # https://github.com/holoviz/spatialpandas/issues/137 +] diff --git a/tox.ini b/tox.ini index 678860750..94e305b02 100644 --- a/tox.ini +++ b/tox.ini @@ -71,14 +71,6 @@ deps = unit: {[_unit]deps} examples_extra: {[_examples_extra]deps} all: {[_all]deps} -[pytest] -addopts = -v --pyargs --doctest-modules --doctest-ignore-import-errors -norecursedirs = doc .git dist build _build .ipynb_checkpoints -# skipping any notebooks that require extra deps -nbsmoke_skip_run = .*tiling.ipynb$ - .*streaming-aggregation.ipynb$ - .*8_Geography.ipynb$ - [flake8] include = *.py # run_tests.py is generated by conda build, which appears to have a From bc9c4095453559481d9e6ab8ffd45b771bb487b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 1 Feb 2024 09:38:20 +0100 Subject: [PATCH 07/14] Fix mix warnings in test suite --- datashader/tests/test_geopandas.py | 6 +++--- datashader/tests/test_pandas.py | 3 ++- datashader/tests/test_pipeline.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/datashader/tests/test_geopandas.py b/datashader/tests/test_geopandas.py index 29b04b8b1..a039dddf3 100644 --- a/datashader/tests/test_geopandas.py +++ b/datashader/tests/test_geopandas.py @@ -200,7 +200,7 @@ def test_lines_spatialpandas(geom_type, explode, use_boundary, npartitions): def test_points_geopandas(geom_type): df = geopandas.read_file(geodatasets.get_path("nybb")) - df["geometry"] = df["geometry"].sample_points(100, seed=93814) # multipoint + df["geometry"] = df["geometry"].sample_points(100, rng=93814) # multipoint if geom_type == "point": df = df.explode(index_parts=False) # Multipoint -> point. unique_geom_type = df["geometry"].geom_type.unique() @@ -218,7 +218,7 @@ def test_points_geopandas(geom_type): def test_points_dask_geopandas(geom_type, npartitions): df = geopandas.read_file(geodatasets.get_path("nybb")) - df["geometry"] = df["geometry"].sample_points(100, seed=93814) # multipoint + df["geometry"] = df["geometry"].sample_points(100, rng=93814) # multipoint if geom_type == "point": df = df.explode(index_parts=False) # Multipoint -> point. unique_geom_type = df["geometry"].geom_type.unique() @@ -240,7 +240,7 @@ def test_points_dask_geopandas(geom_type, npartitions): def test_points_spatialpandas(geom_type, npartitions): df = geopandas.read_file(geodatasets.get_path("nybb")) - df["geometry"] = df["geometry"].sample_points(100, seed=93814) # multipoint + df["geometry"] = df["geometry"].sample_points(100, rng=93814) # multipoint if geom_type == "point": df = df.explode(index_parts=False) # Multipoint -> point. unique_geom_type = df["geometry"].geom_type.unique() diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py index 9ea724e2f..2133e5cdd 100644 --- a/datashader/tests/test_pandas.py +++ b/datashader/tests/test_pandas.py @@ -2734,7 +2734,8 @@ def test_line_antialias(): numerator = np.nan_to_num(line_antialias_sol_0_intersect) + np.nan_to_num(line_antialias_sol_1) denom = np.nan_to_num(line_antialias_sol_count_ignore_aa_0) + \ np.nan_to_num(line_antialias_sol_count_ignore_aa_1) - sol = 3*numerator / denom + with np.errstate(invalid='ignore'): # Dividing by zero is expected + sol = 3*numerator / denom assert_eq_ndarray(agg.data, sol, close=True) agg = cvs.line(agg=ds._min_row_index(), **kwargs) diff --git a/datashader/tests/test_pipeline.py b/datashader/tests/test_pipeline.py index c099524c4..c8fbc5938 100644 --- a/datashader/tests/test_pipeline.py +++ b/datashader/tests/test_pipeline.py @@ -9,7 +9,7 @@ df = pd.DataFrame({'x': np.array(([0.] * 10 + [1] * 10)), 'y': np.array(([0.] * 5 + [1] * 5 + [0] * 5 + [1] * 5)), 'f64': np.arange(20, dtype='f8')}) -df.f64.iloc[2] = np.nan +df.loc['f64', 2] = np.nan cvs = ds.Canvas(plot_width=2, plot_height=2, x_range=(0, 1), y_range=(0, 1)) cvs10 = ds.Canvas(plot_width=10, plot_height=10, x_range=(0, 1), y_range=(0, 1)) From 51997fceec62e4237d1473cc89686a2170880aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 1 Feb 2024 10:17:02 +0100 Subject: [PATCH 08/14] Make xfail run --- datashader/tests/test_datatypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py index 6a5378a24..9f006a3d4 100644 --- a/datashader/tests/test_datatypes.py +++ b/datashader/tests/test_datatypes.py @@ -836,10 +836,10 @@ def test_where_series(self): pass @pytest.mark.xfail(reason="not currently supported") - def test_duplicated(self): + def test_duplicated(self, data): # Added in Pandas 2.2 # https://github.com/pandas-dev/pandas/pull/55255 - pass + super().test_duplicated(data) class TestRaggedPrinting(eb.BasePrintingTests): @pytest.mark.skip(reason="Can't autoconvert ragged array to numpy array") From 001446ab897e4d5bbed1857d8b20ae87181cb4ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 1 Feb 2024 10:18:42 +0100 Subject: [PATCH 09/14] Try commenting out fastparquet for slow windows solve --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bd4bdc871..946b0eb5b 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ 'tests': geopandas + [ 'codecov', 'geodatasets', - 'fastparquet', # optional dependency + # 'fastparquet', # optional dependency 'flake8', 'nbconvert', 'nbformat', From db5a9027c6f56e6d85c9bad6eaade1b86693d05a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 1 Feb 2024 12:06:31 +0100 Subject: [PATCH 10/14] Update nbsmoke_skip_run --- pyproject.toml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b74125734..8e26d6b15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,11 +25,8 @@ norecursedirs = 'doc .git dist build _build .ipynb_checkpoints' minversion = "7" xfail_strict = true log_cli_level = "INFO" -nbsmoke_skip_run = [ # skipping any notebooks that require extra deps - '.*tiling.ipynb$', - '.*streaming-aggregation.ipynb$', - '.*8_Geography.ipynb$', -] +# skipping any notebooks that require extra deps +nbsmoke_skip_run = ".*tiling.ipynb$\n.*streaming-aggregation.ipynb$\n.*8_Geography.ipynb$" filterwarnings = [ "ignore:Passing a (SingleBlockManager|BlockManager) to (Series|GeoSeries|DataFrame|GeoDataFrame) is deprecated:DeprecationWarning", # https://github.com/holoviz/spatialpandas/issues/137 ] From 03adc9a6e26fa4d539325817038f1f74585b9b46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 1 Feb 2024 12:08:23 +0100 Subject: [PATCH 11/14] Update setup.py --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 946b0eb5b..55cc24378 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,6 @@ 'tests': geopandas + [ 'codecov', 'geodatasets', - # 'fastparquet', # optional dependency 'flake8', 'nbconvert', 'nbformat', From b2ff68c67c9ba4fc6874b5f14b0f01167a86b514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Fri, 9 Feb 2024 13:43:32 +0100 Subject: [PATCH 12/14] Ignore dask geopandas warning for now --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 8e26d6b15..83954127c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,4 +29,5 @@ log_cli_level = "INFO" nbsmoke_skip_run = ".*tiling.ipynb$\n.*streaming-aggregation.ipynb$\n.*8_Geography.ipynb$" filterwarnings = [ "ignore:Passing a (SingleBlockManager|BlockManager) to (Series|GeoSeries|DataFrame|GeoDataFrame) is deprecated:DeprecationWarning", # https://github.com/holoviz/spatialpandas/issues/137 + "ignore:Accessing the underlying geometries through the `.data`:DeprecationWarning:dask_geopandas.core", # https://github.com/geopandas/dask-geopandas/issues/264 ] From b466c2fa3b6e849a53aa80580e31714e8a4e8aed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Sat, 10 Feb 2024 10:12:17 +0100 Subject: [PATCH 13/14] Update skipif to only be valid for pandas 2.2.0 --- datashader/tests/test_datatypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datashader/tests/test_datatypes.py b/datashader/tests/test_datatypes.py index 9f006a3d4..7cbd891ad 100644 --- a/datashader/tests/test_datatypes.py +++ b/datashader/tests/test_datatypes.py @@ -899,13 +899,13 @@ def test_transpose_frame(self): pass @pytest.mark.skipif( - Version(pd.__version__) >= Version("2.2"), reason="Stopped working in Pandas 2.2" + Version(pd.__version__) == Version("2.2.0"), reason="Regression in Pandas 2.2" ) def test_merge_on_extension_array(self, data): super().test_merge_on_extension_array(data) @pytest.mark.skipif( - Version(pd.__version__) >= Version("2.2"), reason="Stopped working in Pandas 2.2" + Version(pd.__version__) == Version("2.2.0"), reason="Regression in Pandas 2.2" ) def test_merge_on_extension_array_duplicates(self, data): super().test_merge_on_extension_array_duplicates(data) From 8d3ada093ff5e84c68236c1cb0bdf8f75a364dba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Mon, 12 Feb 2024 12:01:04 +0100 Subject: [PATCH 14/14] Add NotImplementedError for duplicated. --- datashader/datatypes.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datashader/datatypes.py b/datashader/datatypes.py index 11e0f356a..f3d13b6e8 100644 --- a/datashader/datatypes.py +++ b/datashader/datatypes.py @@ -649,6 +649,10 @@ def __array__(self, dtype=None): dtype = np.dtype(object) if dtype is None else np.dtype(dtype) return np.asarray(self.tolist(), dtype=dtype) + def duplicated(self, *args, **kwargs): + msg = "duplicated is not implemented for RaggedArray" + raise NotImplementedError(msg) + @jit(nopython=True, nogil=True) def _eq_ragged_ragged(start_indices1,