Skip to content

Commit

Permalink
BUG: DateTimeIndex.is_year_start unexpected behavior when constructed…
Browse files Browse the repository at this point in the history
… with freq 'MS' date_range (#57377) (#57494)

* Added some comments to where the bug is occurring

* Potential fix, passed all potentially relevant tests

* Very likely fix

* Reverted ro previous start/end scheme; added tests

* Added fixes to whatsnew doc

* Removed stray comment

* Fixed alphabetical problem in whatsnew

* add parametric test

* fixup

---------

Co-authored-by: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
  • Loading branch information
mattheeter and MarcoGorelli authored Jun 8, 2024
1 parent f2f298b commit 81a44fa
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ Categorical

Datetimelike
^^^^^^^^^^^^
- Bug in :attr:`is_year_start` where a DateTimeIndex constructed via a date_range with frequency 'MS' wouldn't have the correct year or quarter start attributes (:issue:`57377`)
- Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`)
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,10 @@ def get_start_end_field(
# month of year. Other offsets use month, startingMonth as ending
# month of year.

if freq_name.lstrip("B")[0:2] in ["MS", "QS", "YS"]:
if freq_name.lstrip("B")[0:2] in ["QS", "YS"]:
end_month = 12 if month_kw == 1 else month_kw - 1
start_month = month_kw

else:
end_month = month_kw
start_month = (end_month % 12) + 1
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def f(self):
month_kw = 12
if freq:
kwds = freq.kwds
month_kw = kwds.get("startingMonth", kwds.get("month", 12))
month_kw = kwds.get("startingMonth", kwds.get("month", month_kw))

if freq is not None:
freq_name = freq.name
Expand Down
95 changes: 95 additions & 0 deletions pandas/tests/indexes/datetimes/test_scalar_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import locale
import unicodedata

from hypothesis import given
import hypothesis.strategies as st
import numpy as np
import pytest

Expand Down Expand Up @@ -329,6 +331,84 @@ def test_dti_is_month_start_custom(self):
with pytest.raises(ValueError, match=msg):
dti.is_month_start

@pytest.mark.parametrize(
"timestamp, freq, periods, expected_values",
[
("2017-12-01", "MS", 3, np.array([False, True, False])),
("2017-12-01", "QS", 3, np.array([True, False, False])),
("2017-12-01", "YS", 3, np.array([True, True, True])),
],
)
def test_dti_dr_is_year_start(self, timestamp, freq, periods, expected_values):
# GH57377
result = date_range(timestamp, freq=freq, periods=periods).is_year_start
tm.assert_numpy_array_equal(result, expected_values)

@pytest.mark.parametrize(
"timestamp, freq, periods, expected_values",
[
("2017-12-01", "ME", 3, np.array([True, False, False])),
("2017-12-01", "QE", 3, np.array([True, False, False])),
("2017-12-01", "YE", 3, np.array([True, True, True])),
],
)
def test_dti_dr_is_year_end(self, timestamp, freq, periods, expected_values):
# GH57377
result = date_range(timestamp, freq=freq, periods=periods).is_year_end
tm.assert_numpy_array_equal(result, expected_values)

@pytest.mark.parametrize(
"timestamp, freq, periods, expected_values",
[
("2017-12-01", "MS", 3, np.array([False, True, False])),
("2017-12-01", "QS", 3, np.array([True, True, True])),
("2017-12-01", "YS", 3, np.array([True, True, True])),
],
)
def test_dti_dr_is_quarter_start(self, timestamp, freq, periods, expected_values):
# GH57377
result = date_range(timestamp, freq=freq, periods=periods).is_quarter_start
tm.assert_numpy_array_equal(result, expected_values)

@pytest.mark.parametrize(
"timestamp, freq, periods, expected_values",
[
("2017-12-01", "ME", 3, np.array([True, False, False])),
("2017-12-01", "QE", 3, np.array([True, True, True])),
("2017-12-01", "YE", 3, np.array([True, True, True])),
],
)
def test_dti_dr_is_quarter_end(self, timestamp, freq, periods, expected_values):
# GH57377
result = date_range(timestamp, freq=freq, periods=periods).is_quarter_end
tm.assert_numpy_array_equal(result, expected_values)

@pytest.mark.parametrize(
"timestamp, freq, periods, expected_values",
[
("2017-12-01", "MS", 3, np.array([True, True, True])),
("2017-12-01", "QS", 3, np.array([True, True, True])),
("2017-12-01", "YS", 3, np.array([True, True, True])),
],
)
def test_dti_dr_is_month_start(self, timestamp, freq, periods, expected_values):
# GH57377
result = date_range(timestamp, freq=freq, periods=periods).is_month_start
tm.assert_numpy_array_equal(result, expected_values)

@pytest.mark.parametrize(
"timestamp, freq, periods, expected_values",
[
("2017-12-01", "ME", 3, np.array([True, True, True])),
("2017-12-01", "QE", 3, np.array([True, True, True])),
("2017-12-01", "YE", 3, np.array([True, True, True])),
],
)
def test_dti_dr_is_month_end(self, timestamp, freq, periods, expected_values):
# GH57377
result = date_range(timestamp, freq=freq, periods=periods).is_month_end
tm.assert_numpy_array_equal(result, expected_values)

def test_dti_is_year_quarter_start_doubledigit_freq(self):
# GH#58523
dr = date_range("2017-01-01", periods=2, freq="10YS")
Expand All @@ -343,3 +423,18 @@ def test_dti_is_year_start_freq_custom_business_day_with_digit(self):
msg = "Custom business days is not supported by is_year_start"
with pytest.raises(ValueError, match=msg):
dr.is_year_start


@given(
dt=st.datetimes(min_value=datetime(1960, 1, 1), max_value=datetime(1980, 1, 1)),
n=st.integers(min_value=1, max_value=10),
freq=st.sampled_from(["MS", "QS", "YS"]),
)
@pytest.mark.slow
def test_against_scalar_parametric(freq, dt, n):
# https://github.com/pandas-dev/pandas/issues/49606
freq = f"{n}{freq}"
d = date_range(dt, periods=3, freq=freq)
result = list(d.is_year_start)
expected = [x.is_year_start for x in d]
assert result == expected

0 comments on commit 81a44fa

Please sign in to comment.