From 75f978a1d4c6e443e9bf693304ffc700365503d9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 20 Aug 2017 14:14:51 -0700 Subject: [PATCH 1/5] Remove unnecessary usage of _TSObject --- pandas/_libs/tslib.pyx | 43 +++++++++--------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 32b8c92a50269..a4c440afe1bac 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -705,7 +705,6 @@ class Timestamp(_Timestamp): pandas_datetimestruct dts int64_t value object _tzinfo, result, k, v - _TSObject ts # set to naive if needed _tzinfo = self.tzinfo @@ -1181,18 +1180,12 @@ cdef class _Timestamp(datetime): If warn=True, issue a warning if nanoseconds is nonzero. """ - cdef: - pandas_datetimestruct dts - _TSObject ts - if self.nanosecond != 0 and warn: warnings.warn("Discarding nonzero nanoseconds in conversion", UserWarning, stacklevel=2) - ts = convert_to_tsobject(self, self.tzinfo, None, 0, 0) - dts = ts.dts - return datetime(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, - dts.us, ts.tzinfo) + return datetime(self.year, self.month, self.day, + self.hour, self.min, self.sec, + self.us, self.tzinfo) cpdef to_datetime64(self): """ Returns a numpy.datetime64 object with 'ns' precision """ @@ -4694,7 +4687,6 @@ def get_date_field(ndarray[int64_t] dtindex, object field): field and return an array of these values. """ cdef: - _TSObject ts Py_ssize_t i, count = 0 ndarray[int32_t] out ndarray[int32_t, ndim=2] _month_offset @@ -4876,7 +4868,6 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, (defined by frequency). """ cdef: - _TSObject ts Py_ssize_t i int count = 0 bint is_business = 0 @@ -4925,9 +4916,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): out[i] = 1 @@ -4951,13 +4941,12 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day doy = mo_off + dom ldom = _month_offset[isleap, dts.month] - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if (ldom == doy and dow < 5) or ( dow == 4 and (ldom - doy <= 2)): @@ -4986,9 +4975,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if ((dts.month - start_month) % 3 == 0) and ( (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): @@ -5013,13 +5001,12 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day doy = mo_off + dom ldom = _month_offset[isleap, dts.month] - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if ((dts.month - end_month) % 3 == 0) and ( (ldom == doy and dow < 5) or ( @@ -5049,9 +5036,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) dom = dts.day - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) if (dts.month == start_month) and ( (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): @@ -5076,12 +5062,11 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) dom = dts.day mo_off = _month_offset[isleap, dts.month - 1] doy = mo_off + dom - dow = ts_dayofweek(ts) + dow = dayofweek(dts.year, dts.month, dts.day) ldom = _month_offset[isleap, dts.month] if (dts.month == end_month) and ( @@ -5095,7 +5080,6 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None, 0, 0) isleap = is_leapyear(dts.year) mo_off = _month_offset[isleap, dts.month - 1] dom = dts.day @@ -5117,7 +5101,6 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field): name based on requested field (e.g. weekday_name) """ cdef: - _TSObject ts Py_ssize_t i, count = 0 ndarray[object] out pandas_datetimestruct dts @@ -5143,10 +5126,6 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field): raise ValueError("Field %s not supported" % field) -cdef inline int m8_weekday(int64_t val): - ts = convert_to_tsobject(val, None, None, 0, 0) - return ts_dayofweek(ts) - cdef int64_t DAY_NS = 86400000000000LL @@ -5156,11 +5135,9 @@ def date_normalize(ndarray[int64_t] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps) pandas_datetimestruct dts - _TSObject tso ndarray[int64_t] result = np.empty(n, dtype=np.int64) if tz is not None: - tso = _TSObject() tz = maybe_get_tz(tz) result = _normalize_local(stamps, tz) else: @@ -5305,8 +5282,6 @@ def monthrange(int64_t year, int64_t month): return (dayofweek(year, month, 1), days) -cdef inline int64_t ts_dayofweek(_TSObject ts): - return dayofweek(ts.dts.year, ts.dts.month, ts.dts.day) cdef inline int days_in_month(pandas_datetimestruct dts) nogil: return days_per_month_table[is_leapyear(dts.year)][dts.month -1] From efa28f87c2bd960e61cc9ef1436137f9bf01977a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 20 Aug 2017 15:12:40 -0700 Subject: [PATCH 2/5] Remove unused functions Only those that are cdef so not exposed. That way there is not a risk of backward incompatibility --- pandas/_libs/period.pyx | 21 --------------------- pandas/_libs/src/datetime.pxd | 32 -------------------------------- pandas/_libs/tslib.pyx | 4 ---- 3 files changed, 57 deletions(-) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index e017d863e1907..4b7884e0ae9b6 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -120,27 +120,6 @@ initialize_daytime_conversion_factor_matrix() # Period logic #---------------------------------------------------------------------- -cdef inline int64_t apply_mult(int64_t period_ord, int64_t mult): - """ - Get freq+multiple ordinal value from corresponding freq-only ordinal value. - For example, 5min ordinal will be 1/5th the 1min ordinal (rounding down to - integer). - """ - if mult == 1: - return period_ord - - return (period_ord - 1) // mult - -cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult): - """ - Get freq-only ordinal value from corresponding freq+multiple ordinal. - """ - if mult == 1: - return period_ord_w_mult - - return period_ord_w_mult * mult + 1; - - @cython.wraparound(False) @cython.boundscheck(False) def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): diff --git a/pandas/_libs/src/datetime.pxd b/pandas/_libs/src/datetime.pxd index 2267c8282ec14..23620e790c132 100644 --- a/pandas/_libs/src/datetime.pxd +++ b/pandas/_libs/src/datetime.pxd @@ -88,11 +88,6 @@ cdef extern from "datetime/np_datetime.h": int cmp_pandas_datetimestruct(pandas_datetimestruct *a, pandas_datetimestruct *b) - int convert_pydatetime_to_datetimestruct(PyObject *obj, - pandas_datetimestruct *out, - PANDAS_DATETIMEUNIT *out_bestunit, - int apply_tzinfo) - npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *d) nogil void pandas_datetime_to_datetimestruct(npy_datetime val, @@ -112,12 +107,6 @@ cdef extern from "datetime/np_datetime_strings.h": PANDAS_DATETIMEUNIT *out_bestunit, npy_bool *out_special) - int make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen, - int local, PANDAS_DATETIMEUNIT base, int tzoffset, - NPY_CASTING casting) - - int get_datetime_iso_8601_strlen(int local, PANDAS_DATETIMEUNIT base) - # int parse_python_string(object obj, pandas_datetimestruct *out) except -1 @@ -152,16 +141,6 @@ cdef inline int _cstring_to_dts(char *val, int length, return result -cdef inline object _datetime64_to_datetime(int64_t val): - cdef pandas_datetimestruct dts - pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) - return _dts_to_pydatetime(&dts) - -cdef inline object _dts_to_pydatetime(pandas_datetimestruct *dts): - return PyDateTime_FromDateAndTime(dts.year, dts.month, - dts.day, dts.hour, - dts.min, dts.sec, dts.us) - cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts): dts.year = PyDateTime_GET_YEAR(val) dts.month = PyDateTime_GET_MONTH(val) @@ -173,17 +152,6 @@ cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts): dts.ps = dts.as = 0 return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) -cdef inline int64_t _dtlike_to_datetime64(object val, - pandas_datetimestruct *dts): - dts.year = val.year - dts.month = val.month - dts.day = val.day - dts.hour = val.hour - dts.min = val.minute - dts.sec = val.second - dts.us = val.microsecond - dts.ps = dts.as = 0 - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts) cdef inline int64_t _date_to_datetime64(object val, pandas_datetimestruct *dts): diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a4c440afe1bac..a14bec50e10ac 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1008,10 +1008,6 @@ def unique_deltas(ndarray[int64_t] arr): return result -cdef inline bint _is_multiple(int64_t us, int64_t mult): - return us % mult == 0 - - cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: if op == Py_EQ: return lhs == rhs From 73e67ef7355a1d6b01a5f7189ee7a3b21d108113 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 20 Aug 2017 15:27:18 -0700 Subject: [PATCH 3/5] Fix AttributeError mistake --- pandas/_libs/tslib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a14bec50e10ac..f6d0d7d44bac1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1180,8 +1180,8 @@ cdef class _Timestamp(datetime): warnings.warn("Discarding nonzero nanoseconds in conversion", UserWarning, stacklevel=2) return datetime(self.year, self.month, self.day, - self.hour, self.min, self.sec, - self.us, self.tzinfo) + self.hour, self.minute, self.second, + self.microsecond, self.tzinfo) cpdef to_datetime64(self): """ Returns a numpy.datetime64 object with 'ns' precision """ From b6aaa9edb579bcf8657d5b329e6c75cfeeb82898 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 20 Aug 2017 20:47:27 -0700 Subject: [PATCH 4/5] Flake8 whitespace fixup --- pandas/_libs/period.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 4b7884e0ae9b6..6ba7ec0270f30 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -120,6 +120,7 @@ initialize_daytime_conversion_factor_matrix() # Period logic #---------------------------------------------------------------------- + @cython.wraparound(False) @cython.boundscheck(False) def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): From a99133c7dde47a008b8ed7357b4c8e6453d8682f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 21 Aug 2017 09:21:32 -0700 Subject: [PATCH 5/5] Revert changes to py_datetime which could be slower --- pandas/_libs/tslib.pyx | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index f6d0d7d44bac1..c4a38ec660a4c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1176,12 +1176,18 @@ cdef class _Timestamp(datetime): If warn=True, issue a warning if nanoseconds is nonzero. """ + cdef: + pandas_datetimestruct dts + _TSObject ts + if self.nanosecond != 0 and warn: warnings.warn("Discarding nonzero nanoseconds in conversion", UserWarning, stacklevel=2) - return datetime(self.year, self.month, self.day, - self.hour, self.minute, self.second, - self.microsecond, self.tzinfo) + ts = convert_to_tsobject(self, self.tzinfo, None, 0, 0) + dts = ts.dts + return datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, + dts.us, ts.tzinfo) cpdef to_datetime64(self): """ Returns a numpy.datetime64 object with 'ns' precision """