Skip to content

Commit

Permalink
[SPARK-43709][PS] Remove closed parameter from ps.date_range & en…
Browse files Browse the repository at this point in the history
…able test

### What changes were proposed in this pull request?

This PR proposes to remove `closed` parameter from `ps.date_range` & enable test. See pandas-dev/pandas#40245 more detail.

### Why are the changes needed?

To support pandas 2.0.0 and above.

### Does this PR introduce _any_ user-facing change?

`closed` parameter will no longer available from `ps.date_range` API.

### How was this patch tested?

Enabling the existing UT.

Closes apache#42389 from itholic/closed_removing.

Authored-by: itholic <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
itholic authored and vpolet committed Aug 24, 2023
1 parent cbc1181 commit 8371010
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 51 deletions.
1 change: 1 addition & 0 deletions python/docs/source/migration_guide/pyspark_upgrade.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Upgrading from PySpark 3.5 to 4.0
* In Spark 4.0, ``Series.mad`` has been removed from pandas API on Spark.
* In Spark 4.0, ``na_sentinel`` parameter from ``Index.factorize`` and `Series.factorize`` has been removed from pandas API on Spark, use ``use_na_sentinel`` instead.
* In Spark 4.0, ``inplace`` parameter from ``Categorical.add_categories``, ``Categorical.remove_categories``, ``Categorical.set_categories``, ``Categorical.rename_categories``, ``Categorical.reorder_categories``, ``Categorical.as_ordered``, ``Categorical.as_unordered`` have been removed from pandas API on Spark.
* In Spark 4.0, ``closed`` parameter from ``ps.date_range`` has been removed from pandas API on Spark.


Upgrading from PySpark 3.3 to 3.4
Expand Down
38 changes: 1 addition & 37 deletions python/pyspark/pandas/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1751,7 +1751,7 @@ def pandas_to_datetime(
)


# TODO(SPARK-42621): Add `inclusive` parameter and replace `closed`.
# TODO(SPARK-42621): Add `inclusive` parameter.
# See https://github.com/pandas-dev/pandas/issues/40245
def date_range(
start: Union[str, Any] = None,
Expand All @@ -1761,7 +1761,6 @@ def date_range(
tz: Optional[Union[str, tzinfo]] = None,
normalize: bool = False,
name: Optional[str] = None,
closed: Optional[str] = None,
**kwargs: Any,
) -> DatetimeIndex:
"""
Expand All @@ -1785,12 +1784,6 @@ def date_range(
Normalize start/end dates to midnight before generating date range.
name : str, default None
Name of the resulting DatetimeIndex.
closed : {None, 'left', 'right'}, optional
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None, the default).
.. deprecated:: 3.4.0
**kwargs
For compatibility. Has no effect on the result.
Expand Down Expand Up @@ -1874,37 +1867,9 @@ def date_range(
DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
'2019-01-31'],
dtype='datetime64[ns]', freq=None)
`closed` controls whether to include `start` and `end` that are on the
boundary. The default includes boundary points on either end.
>>> ps.date_range(
... start='2017-01-01', end='2017-01-04', closed=None
... ) # doctest: +SKIP
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'],
dtype='datetime64[ns]', freq=None)
Use ``closed='left'`` to exclude `end` if it falls on the boundary.
>>> ps.date_range(
... start='2017-01-01', end='2017-01-04', closed='left'
... ) # doctest: +SKIP
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq=None)
Use ``closed='right'`` to exclude `start` if it falls on the boundary.
>>> ps.date_range(
... start='2017-01-01', end='2017-01-04', closed='right'
... ) # doctest: +SKIP
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq=None)
"""
assert freq not in ["N", "ns"], "nanoseconds is not supported"
assert tz is None, "Localized DatetimeIndex is not supported"
if closed is not None:
warnings.warn(
"Argument `closed` is deprecated in 3.4.0 and will be removed in 4.0.0.",
FutureWarning,
)

return cast(
DatetimeIndex,
Expand All @@ -1917,7 +1882,6 @@ def date_range(
tz=tz,
normalize=normalize,
name=name,
closed=closed,
**kwargs,
)
),
Expand Down
14 changes: 0 additions & 14 deletions python/pyspark/pandas/tests/test_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,6 @@ def test_to_datetime(self):
self.assert_eq(pd.to_datetime(pdf), ps.to_datetime(psdf))
self.assert_eq(pd.to_datetime(dict_from_pdf), ps.to_datetime(dict_from_pdf))

@unittest.skipIf(
LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
"TODO(SPARK-43709): Enable NamespaceTests.test_date_range for pandas 2.0.0.",
)
def test_date_range(self):
self.assert_eq(
ps.date_range(start="1/1/2018", end="1/08/2018"),
Expand Down Expand Up @@ -225,16 +221,6 @@ def test_date_range(self):
pd.date_range(start="1/1/2018", periods=5, freq=pd.offsets.MonthEnd(3)),
)

self.assert_eq(
ps.date_range(start="2017-01-01", end="2017-01-04", closed="left"),
pd.date_range(start="2017-01-01", end="2017-01-04", closed="left"),
)

self.assert_eq(
ps.date_range(start="2017-01-01", end="2017-01-04", closed="right"),
pd.date_range(start="2017-01-01", end="2017-01-04", closed="right"),
)

self.assertRaises(
AssertionError, lambda: ps.date_range(start="1/1/2018", periods=5, tz="Asia/Tokyo")
)
Expand Down

0 comments on commit 8371010

Please sign in to comment.