Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: fixes weekday for dates before 1752 #53795

Merged
merged 13 commits into from
Jun 27, 2023
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ including other versions of pandas.

Fixed regressions
~~~~~~~~~~~~~~~~~
- Bug in :meth:`Timestamp.weekday`` was returning incorrect results before ``'0000-02-29'`` (:issue:`53738`)
- Fixed performance regression in merging on datetime-like columns (:issue:`53231`)
- Fixed regression when :meth:`DataFrame.to_string` creates extra space for string dtypes (:issue:`52690`)
- For external ExtensionArray implementations, restored the default use of ``_values_for_factorize`` for hashing arrays (:issue:`53475`)
Expand Down
45 changes: 33 additions & 12 deletions pandas/_libs/tslibs/ccalendar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"""
Cython implementations of functions resembling the stdlib calendar module
"""

cimport cython
from numpy cimport (
int32_t,
Expand All @@ -19,7 +18,7 @@ cdef int32_t* days_per_month_array = [
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]
cdef int* em = [0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]

# The first 13 entries give the month days elapsed as of the first of month N
# (or the total number of days in the year for N=13) in non-leap years.
Expand Down Expand Up @@ -76,11 +75,22 @@ cpdef int32_t get_days_in_month(int year, Py_ssize_t month) noexcept nogil:

@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision
@cython.cdivision(True)
cdef long quot(long a , long b) noexcept nogil:
cdef long x
x = a/b
if (a < 0):
x -= (a % b != 0)
return x


@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
cdef int dayofweek(int y, int m, int d) noexcept nogil:
"""
Find the day of week for the date described by the Y/M/D triple y, m, d
using Sakamoto's method, from wikipedia.
using Gauss' method, from wikipedia.

0 represents Monday. See [1]_.

Expand All @@ -103,16 +113,27 @@ cdef int dayofweek(int y, int m, int d) noexcept nogil:
[1] https://docs.python.org/3/library/calendar.html#calendar.weekday

[2] https://en.wikipedia.org/wiki/\
Determination_of_the_day_of_the_week#Sakamoto.27s_methods
Determination_of_the_day_of_the_week#Gauss's_algorithm
"""
# Note: this particular implementation comes from
# http://berndt-schwerdtfeger.de/wp-content/uploads/pdf/cal.pdf
cdef:
int day

y -= m < 3
day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7
# convert to python day
return (day + 6) % 7

long c
int g
int f
int e

if (m < 3):
y -= 1

c = quot(y, 100)
g = y - c * 100
f = 5 * (c - quot(c, 4) * 4)
e = em[m]

if (m > 2):
e -= 1
return (-1 + d + e + f + g + g/4) % 7

cdef bint is_leapyear(int64_t year) noexcept nogil:
"""
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
tzlocal,
tzutc,
)
from hypothesis import (
given,
strategies as st,
)
import numpy as np
import pytest
import pytz
Expand Down Expand Up @@ -223,6 +227,39 @@ def test_resolution(self):
assert dt.as_unit("ms").resolution == Timedelta(milliseconds=1)
assert dt.as_unit("s").resolution == Timedelta(seconds=1)

@pytest.mark.parametrize(
"date_string, expected",
[
("0000-2-29", 1),
("0000-3-1", 2),
("1582-10-14", 3),
("-0040-1-1", 4),
("2023-06-18", 6),
],
)
def test_dow_historic(self, date_string, expected):
# GH 53738
ts = Timestamp(date_string)
dow = ts.weekday()
assert dow == expected

@given(
ts=st.datetimes(),
sign=st.sampled_from(["-", ""]),
)
def test_dow_parametric(self, ts, sign):
# GH 53738
ts = (
f"{sign}{str(ts.year).zfill(4)}"
f"-{str(ts.month).zfill(2)}"
f"-{str(ts.day).zfill(2)}"
)
result = Timestamp(ts).weekday()
expected = (
(np.datetime64(ts) - np.datetime64("1970-01-01")).astype("int64") - 4
) % 7
assert result == expected


class TestTimestamp:
def test_default_to_stdlib_utc(self):
Expand Down