From 1c4decd8429d98d753160a43d0f4cc5097b74b72 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Fri, 15 Feb 2019 18:43:29 -0500 Subject: [PATCH 01/33] interval constructor logic modified and error handling for is_dtype updated to include new TypeError --- pandas/core/dtypes/dtypes.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 640d43f3b0e03..af04fb3038c5a 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -893,11 +893,15 @@ def __new__(cls, subtype=None): m = cls._match.search(subtype) if m is not None: subtype = m.group('subtype') - - try: - subtype = pandas_dtype(subtype) - except TypeError: - raise TypeError("could not construct IntervalDtype") + else: + # if no match found, a bad datatype was passed + msg = ('category, object, and string subtypes are not supported ' + 'for IntervalDtype') + raise TypeError(msg) + try: + subtype = pandas_dtype(subtype) + except TypeError: + raise TypeError("could not construct IntervalDtype") if is_categorical_dtype(subtype) or is_string_dtype(subtype): # GH 19016 @@ -978,7 +982,7 @@ def is_dtype(cls, dtype): return True else: return False - except ValueError: + except (ValueError, TypeError): return False else: return False From d52c6317eadb0e30f7f85cdd339b76a9a3db0b20 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Fri, 15 Feb 2019 18:47:39 -0500 Subject: [PATCH 02/33] formatting updates for error msg --- pandas/core/dtypes/dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index af04fb3038c5a..cd96413a71500 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -895,8 +895,8 @@ def __new__(cls, subtype=None): subtype = m.group('subtype') else: # if no match found, a bad datatype was passed - msg = ('category, object, and string subtypes are not supported ' - 'for IntervalDtype') + msg = ('category, object, and string subtypes are not ' + 'supported for IntervalDtype') raise TypeError(msg) try: subtype = pandas_dtype(subtype) From 953823e7535a19eaa109f837e4754572f39873f2 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 13:45:11 -0500 Subject: [PATCH 03/33] subtype check logic implemented for Interval dtype --- pandas/core/dtypes/dtypes.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index cd96413a71500..07e014b14f7bc 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -893,20 +893,28 @@ def __new__(cls, subtype=None): m = cls._match.search(subtype) if m is not None: subtype = m.group('subtype') - else: - # if no match found, a bad datatype was passed - msg = ('category, object, and string subtypes are not ' - 'supported for IntervalDtype') - raise TypeError(msg) - try: - subtype = pandas_dtype(subtype) - except TypeError: - raise TypeError("could not construct IntervalDtype") - - if is_categorical_dtype(subtype) or is_string_dtype(subtype): - # GH 19016 + + # check subtype is numeric, datetime, or timedelta + valid_subtype = False + try: + np.issubdtype(subtype, np.number) + valid_subtype = True + except: + pass + try: + np.issubdtype(subdtype, np.datetime64) + valid_subtype = True + except: + pass + try: + np.issubdtype(subtype, np.timedelta64) + valid_subtype = True + except: + pass + + if not valid_subtype: msg = ('category, object, and string subtypes are not supported ' - 'for IntervalDtype') + 'for IntervalDtype') raise TypeError(msg) try: From 8edbbd8dda40c850efc578afbf15532c37cd2d81 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 13:51:11 -0500 Subject: [PATCH 04/33] tests added to ensure pd.api.types.IntervalDtype.is_dtype(IntervalA) returns false --- pandas/tests/dtypes/test_dtypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 710f215686eab..6be06412e952b 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -559,6 +559,7 @@ def test_is_dtype(self): assert not IntervalDtype.is_dtype('U') assert not IntervalDtype.is_dtype('S') assert not IntervalDtype.is_dtype('foo') + assert not IntervalDtype.is_dtype('IntervalA') assert not IntervalDtype.is_dtype(np.object_) assert not IntervalDtype.is_dtype(np.int64) assert not IntervalDtype.is_dtype(np.float64) From 8e543084ef9b6c680e91e5ea9736581a3972ddda Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 13:53:34 -0500 Subject: [PATCH 05/33] tests added to ensure * pd.api.types.IntervalDtype(IntervalA) throws TypeError --- pandas/tests/dtypes/test_dtypes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 6be06412e952b..7673b4d60ea44 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -503,7 +503,8 @@ def test_construction_generic(self, subtype): @pytest.mark.parametrize('subtype', [ CategoricalDtype(list('abc'), False), CategoricalDtype(list('wxyz'), True), - object, str, ' Date: Sun, 17 Feb 2019 13:59:10 -0500 Subject: [PATCH 06/33] test added to ensure pd.api.types.IntervalDtype.construct_from_string(IntervalA) throws TypeError --- pandas/tests/dtypes/test_dtypes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 7673b4d60ea44..00911a34a1672 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -533,10 +533,11 @@ def test_construction_from_string_errors(self, string): IntervalDtype.construct_from_string(string) @pytest.mark.parametrize('string', [ - 'interval[foo]']) + 'interval[foo]', 'IntervalA']) def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype - msg = 'could not construct IntervalDtype' + msg = ('category, object, and string subtypes are not supported ' + 'for IntervalDtype') with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) From 708a7403f2b473bfcdca41561fa09faa25e86f2d Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 14:02:08 -0500 Subject: [PATCH 07/33] PEP8 style issues resolved --- pandas/core/dtypes/dtypes.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 07e014b14f7bc..bd63c6e4bc4c6 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -893,28 +893,28 @@ def __new__(cls, subtype=None): m = cls._match.search(subtype) if m is not None: subtype = m.group('subtype') - + # check subtype is numeric, datetime, or timedelta valid_subtype = False - try: + try: np.issubdtype(subtype, np.number) valid_subtype = True - except: + except TypeError: pass - try: + try: np.issubdtype(subdtype, np.datetime64) valid_subtype = True - except: + except TypeError: pass - try: + try: np.issubdtype(subtype, np.timedelta64) valid_subtype = True - except: + except TypeError: pass if not valid_subtype: msg = ('category, object, and string subtypes are not supported ' - 'for IntervalDtype') + 'for IntervalDtype') raise TypeError(msg) try: From 9ae2afc99163430c44b7317b035d304a03f8ade4 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 14:03:37 -0500 Subject: [PATCH 08/33] more PEP8 style issues resolved --- pandas/core/dtypes/dtypes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index bd63c6e4bc4c6..9f27ece7d352a 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -902,7 +902,7 @@ def __new__(cls, subtype=None): except TypeError: pass try: - np.issubdtype(subdtype, np.datetime64) + np.issubdtype(subtype, np.datetime64) valid_subtype = True except TypeError: pass @@ -911,8 +911,8 @@ def __new__(cls, subtype=None): valid_subtype = True except TypeError: pass - - if not valid_subtype: + + if not valid_subtype: msg = ('category, object, and string subtypes are not supported ' 'for IntervalDtype') raise TypeError(msg) From 0837c858d0758d657b0e5d96ef5d7c87ce3fcb5b Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 14:14:32 -0500 Subject: [PATCH 09/33] subtype checking consolidated into for loop --- pandas/core/dtypes/dtypes.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 9f27ece7d352a..edb418b92e147 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -896,21 +896,13 @@ def __new__(cls, subtype=None): # check subtype is numeric, datetime, or timedelta valid_subtype = False - try: - np.issubdtype(subtype, np.number) - valid_subtype = True - except TypeError: - pass - try: - np.issubdtype(subtype, np.datetime64) - valid_subtype = True - except TypeError: - pass - try: - np.issubdtype(subtype, np.timedelta64) - valid_subtype = True - except TypeError: - pass + for nptype in [np.number, np.datetime64, np.timedelta64]: + try: + np.issubdtype(subtype, nptype) + valid_subtype = True + break + except TypeError: + pass if not valid_subtype: msg = ('category, object, and string subtypes are not supported ' From c0818b66c0cdff101bc6536ab01158ebbe0883c5 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 15:06:32 -0500 Subject: [PATCH 10/33] logic change to include issubtype must result in True --- pandas/core/dtypes/dtypes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index edb418b92e147..426e7af833003 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -898,9 +898,9 @@ def __new__(cls, subtype=None): valid_subtype = False for nptype in [np.number, np.datetime64, np.timedelta64]: try: - np.issubdtype(subtype, nptype) - valid_subtype = True - break + if np.issubdtype(subtype, nptype): + valid_subtype = True + break except TypeError: pass From dedba898886cd9f7e39b82eb9057d28a686031cb Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 16:36:22 -0500 Subject: [PATCH 11/33] logic change for checking datetime and timedelta dtypes --- pandas/core/dtypes/dtypes.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 426e7af833003..1cb42b66bb6c4 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -6,7 +6,7 @@ import pytz from pandas._libs.interval import Interval -from pandas._libs.tslibs import NaT, Period, Timestamp, timezones +from pandas._libs.tslibs import NaT, Period, Timestamp, timezones, Timedelta from pandas.core.dtypes.generic import ( ABCCategoricalIndex, ABCDateOffset, ABCIndexClass) @@ -896,13 +896,26 @@ def __new__(cls, subtype=None): # check subtype is numeric, datetime, or timedelta valid_subtype = False - for nptype in [np.number, np.datetime64, np.timedelta64]: - try: - if np.issubdtype(subtype, nptype): - valid_subtype = True - break - except TypeError: - pass + + # test for number + try: + if np.issubdtype(subtype, np.number): + valid_subtype = True + except TypeError: + pass + # test for datetime + try: + DatetimeTZDtype.construct_from_string(subtype) + valid_subtype = True + except TypeError: + pass + # test for timedelta + try: + Timedelta(subtype) + valid_subtype = True + except ValueError: + pass + if not valid_subtype: msg = ('category, object, and string subtypes are not supported ' From 6914a7b192aafbd9ff75273ebd3f9cb677ed1a70 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 16:37:24 -0500 Subject: [PATCH 12/33] extra blank line removed --- pandas/core/dtypes/dtypes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 1cb42b66bb6c4..82722190830f8 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -916,7 +916,6 @@ def __new__(cls, subtype=None): except ValueError: pass - if not valid_subtype: msg = ('category, object, and string subtypes are not supported ' 'for IntervalDtype') From c53d69cf8a11f2b0616acf42b0f51c2c7b9e8e15 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Sun, 17 Feb 2019 17:42:48 -0500 Subject: [PATCH 13/33] reset to original logic on working branch, only tests written remain committed to branch --- pandas/core/dtypes/dtypes.py | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 82722190830f8..640d43f3b0e03 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -6,7 +6,7 @@ import pytz from pandas._libs.interval import Interval -from pandas._libs.tslibs import NaT, Period, Timestamp, timezones, Timedelta +from pandas._libs.tslibs import NaT, Period, Timestamp, timezones from pandas.core.dtypes.generic import ( ABCCategoricalIndex, ABCDateOffset, ABCIndexClass) @@ -894,29 +894,13 @@ def __new__(cls, subtype=None): if m is not None: subtype = m.group('subtype') - # check subtype is numeric, datetime, or timedelta - valid_subtype = False - - # test for number - try: - if np.issubdtype(subtype, np.number): - valid_subtype = True - except TypeError: - pass - # test for datetime - try: - DatetimeTZDtype.construct_from_string(subtype) - valid_subtype = True - except TypeError: - pass - # test for timedelta - try: - Timedelta(subtype) - valid_subtype = True - except ValueError: - pass + try: + subtype = pandas_dtype(subtype) + except TypeError: + raise TypeError("could not construct IntervalDtype") - if not valid_subtype: + if is_categorical_dtype(subtype) or is_string_dtype(subtype): + # GH 19016 msg = ('category, object, and string subtypes are not supported ' 'for IntervalDtype') raise TypeError(msg) @@ -994,7 +978,7 @@ def is_dtype(cls, dtype): return True else: return False - except (ValueError, TypeError): + except ValueError: return False else: return False From f2d2390f4817bda01ac5884ba74595f8c907c37e Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Mon, 18 Feb 2019 18:47:36 -0500 Subject: [PATCH 14/33] logic for string constructor and error message content updated --- pandas/core/dtypes/dtypes.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 640d43f3b0e03..18614e8dde82c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -931,10 +931,15 @@ def construct_from_string(cls, string): attempt to construct this type from a string, raise a TypeError if its not possible """ - if (isinstance(string, compat.string_types) and - (string.startswith('interval') or - string.startswith('Interval'))): - return cls(string) + if isinstance(string, compat.string_types): + if (string.lower() == 'interval' or + cls._match.search(string) is not None): + return cls(string) + else: + msg = ("Incorrectly formatted string passed to constructor. " + "Valid formats include 'Interval' or 'Inverval[dtype]' " + "where dtype is numeric, datetime, or timedelta") + raise TypeError(msg) msg = "a string needs to be passed, got type {typ}" raise TypeError(msg.format(typ=type(string))) From 08ea833fd9bd75acdb376a6ab70c4e9c2e7890da Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Mon, 18 Feb 2019 18:52:32 -0500 Subject: [PATCH 15/33] construction error for IntervalA parameter moved to proper test --- pandas/tests/dtypes/test_dtypes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 00911a34a1672..43be93900d1df 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -503,8 +503,7 @@ def test_construction_generic(self, subtype): @pytest.mark.parametrize('subtype', [ CategoricalDtype(list('abc'), False), CategoricalDtype(list('wxyz'), True), - object, str, ' Date: Mon, 18 Feb 2019 18:57:11 -0500 Subject: [PATCH 16/33] IntervalDtype construct_from_string tests updated to separate a bad datatype (not a string) being passed, from a bad string being passed --- pandas/tests/dtypes/test_dtypes.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 43be93900d1df..b701d48152009 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -524,7 +524,7 @@ def test_construction_from_string(self): assert is_dtype_equal(self.dtype, result) @pytest.mark.parametrize('string', [ - 'foo', 'foo[int64]', 0, 3.14, ('a', 'b'), None]) + 0, 3.14, ('a', 'b'), None]) def test_construction_from_string_errors(self, string): # these are invalid entirely msg = 'a string needs to be passed, got type' @@ -533,11 +533,12 @@ def test_construction_from_string_errors(self, string): IntervalDtype.construct_from_string(string) @pytest.mark.parametrize('string', [ - 'interval[foo]', 'IntervalA']) + 'interval[foo]', 'IntervalA', 'foo', 'foo[int64]']) def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype - msg = ('category, object, and string subtypes are not supported ' - 'for IntervalDtype') + msg = ("Incorrectly formatted string passed to constructor. " + "Valid formats include 'Interval' or 'Inverval[dtype]' " + "where dtype is numeric, datetime, or timedelta") with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) From f2a3bdba3304e7f249afde712b3e38805572a3f4 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Mon, 18 Feb 2019 18:59:09 -0500 Subject: [PATCH 17/33] TypeError added to is_dtype exceptions, reflecting changes in the construct_from_string method --- pandas/core/dtypes/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 18614e8dde82c..ce19196c1657d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -983,7 +983,7 @@ def is_dtype(cls, dtype): return True else: return False - except ValueError: + except (ValueError, TypeError): return False else: return False From 0011a8093e62878b90e85a548c219800cbf47ea6 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Mon, 18 Feb 2019 19:05:28 -0500 Subject: [PATCH 18/33] formatting fixes and test update --- pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index ce19196c1657d..6e9b78e21aeea 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -932,7 +932,7 @@ def construct_from_string(cls, string): if its not possible """ if isinstance(string, compat.string_types): - if (string.lower() == 'interval' or + if (string.lower() == 'interval' or cls._match.search(string) is not None): return cls(string) else: diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index b701d48152009..5c2dbda7058ea 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -512,7 +512,7 @@ def test_construction_not_supported(self, subtype): IntervalDtype(subtype) @pytest.mark.parametrize('subtype', ['xx', 'IntervalA']) - def test_construction_errors(self): + def test_construction_errors(self, subtype): msg = 'could not construct IntervalDtype' with pytest.raises(TypeError, match=msg): IntervalDtype(subtype) From aa581fa4996a5c7aab0ba9459ea0b285f139be11 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Mon, 18 Feb 2019 19:06:39 -0500 Subject: [PATCH 19/33] formatting update --- pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 6e9b78e21aeea..70d1d93e12191 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -934,7 +934,7 @@ def construct_from_string(cls, string): if isinstance(string, compat.string_types): if (string.lower() == 'interval' or cls._match.search(string) is not None): - return cls(string) + return cls(string) else: msg = ("Incorrectly formatted string passed to constructor. " "Valid formats include 'Interval' or 'Inverval[dtype]' " diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 5c2dbda7058ea..a6d383d4f67c1 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -537,8 +537,8 @@ def test_construction_from_string_errors(self, string): def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype msg = ("Incorrectly formatted string passed to constructor. " - "Valid formats include 'Interval' or 'Inverval[dtype]' " - "where dtype is numeric, datetime, or timedelta") + "Valid formats include 'Interval' or 'Inverval[dtype]' " + "where dtype is numeric, datetime, or timedelta") with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) From 54bcf76e5922fe19c27dbb743161b80e56d233ef Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Mon, 18 Feb 2019 20:12:45 -0500 Subject: [PATCH 20/33] construct from string formatting fixes --- pandas/core/dtypes/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 70d1d93e12191..abf98b1740c34 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -933,7 +933,7 @@ def construct_from_string(cls, string): """ if isinstance(string, compat.string_types): if (string.lower() == 'interval' or - cls._match.search(string) is not None): + cls._match.search(string) is not None): return cls(string) else: msg = ("Incorrectly formatted string passed to constructor. " From 36f8571bfea9f4f2dc1d9aa01ce68d631552e867 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Mon, 18 Feb 2019 20:16:43 -0500 Subject: [PATCH 21/33] interval[foo] construct from string error moved to proper test category --- pandas/tests/dtypes/test_dtypes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index a6d383d4f67c1..0ac077ce49ccd 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -511,7 +511,7 @@ def test_construction_not_supported(self, subtype): with pytest.raises(TypeError, match=msg): IntervalDtype(subtype) - @pytest.mark.parametrize('subtype', ['xx', 'IntervalA']) + @pytest.mark.parametrize('subtype', ['xx', 'IntervalA', 'Interval[foo]']) def test_construction_errors(self, subtype): msg = 'could not construct IntervalDtype' with pytest.raises(TypeError, match=msg): @@ -533,7 +533,7 @@ def test_construction_from_string_errors(self, string): IntervalDtype.construct_from_string(string) @pytest.mark.parametrize('string', [ - 'interval[foo]', 'IntervalA', 'foo', 'foo[int64]']) + 'IntervalA', 'foo', 'foo[int64]']) def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype msg = ("Incorrectly formatted string passed to constructor. " From 6e6fe2366128cbb7ba67974dd31080ca00a93d0f Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Mon, 18 Feb 2019 20:17:59 -0500 Subject: [PATCH 22/33] IntervalA construct from string error moved to proper test category --- pandas/tests/dtypes/test_dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 0ac077ce49ccd..3b56b1c677e3d 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -533,7 +533,7 @@ def test_construction_from_string_errors(self, string): IntervalDtype.construct_from_string(string) @pytest.mark.parametrize('string', [ - 'IntervalA', 'foo', 'foo[int64]']) + 'foo', 'foo[int64]']) def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype msg = ("Incorrectly formatted string passed to constructor. " From 5547b5fa0c68a71c213003ed84e17cfaf0481ab2 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Mon, 18 Feb 2019 21:06:53 -0500 Subject: [PATCH 23/33] IntervalDtype construct from string test error message handling updated, regex match in error message string was causing test to fail --- pandas/core/dtypes/dtypes.py | 6 +++--- pandas/tests/dtypes/test_dtypes.py | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index abf98b1740c34..e78a543d7faa7 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -936,9 +936,9 @@ def construct_from_string(cls, string): cls._match.search(string) is not None): return cls(string) else: - msg = ("Incorrectly formatted string passed to constructor. " - "Valid formats include 'Interval' or 'Inverval[dtype]' " - "where dtype is numeric, datetime, or timedelta") + msg = ('Incorrectly formatted string passed to constructor. ' + 'Valid formats include Interval or Inverval[dtype] ' + 'where dtype is numeric, datetime, or timedelta') raise TypeError(msg) msg = "a string needs to be passed, got type {typ}" diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 3b56b1c677e3d..826f7ae26a797 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -536,12 +536,13 @@ def test_construction_from_string_errors(self, string): 'foo', 'foo[int64]']) def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype - msg = ("Incorrectly formatted string passed to constructor. " - "Valid formats include 'Interval' or 'Inverval[dtype]' " - "where dtype is numeric, datetime, or timedelta") + msg = ('Incorrectly formatted string passed to constructor. ' + 'Valid formats include Interval or Inverval[dtype] ' + 'where dtype is numeric, datetime, or timedelta') - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError) as e: IntervalDtype.construct_from_string(string) + assert e == msg def test_subclass(self): a = IntervalDtype('interval[int64]') From aedc7cca6e71fc8bbf4d99249ee46027c3654934 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 08:17:56 -0500 Subject: [PATCH 24/33] re-ordering of logic in construct from string method for cleaner code --- pandas/core/dtypes/dtypes.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e78a543d7faa7..3c64b5e0e2119 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -931,18 +931,18 @@ def construct_from_string(cls, string): attempt to construct this type from a string, raise a TypeError if its not possible """ - if isinstance(string, compat.string_types): - if (string.lower() == 'interval' or - cls._match.search(string) is not None): - return cls(string) - else: - msg = ('Incorrectly formatted string passed to constructor. ' - 'Valid formats include Interval or Inverval[dtype] ' - 'where dtype is numeric, datetime, or timedelta') - raise TypeError(msg) - - msg = "a string needs to be passed, got type {typ}" - raise TypeError(msg.format(typ=type(string))) + if not isinstance(string, compat.string_types): + msg = "a string needs to be passed, got type {typ}" + raise TypeError(msg.format(typ=type(string))) + + if (string.lower() == 'interval' or + cls._match.search(string) is not None): + return cls(string) + + msg = ('Incorrectly formatted string passed to constructor. ' + 'Valid formats include Interval or Inverval[dtype] ' + 'where dtype is numeric, datetime, or timedelta') + raise TypeError(msg) @property def type(self): From 2deba07d6ac0c1088275db1ac711ee19d0bd6dfb Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 09:18:27 -0500 Subject: [PATCH 25/33] message string altered to work with regex matching --- pandas/tests/dtypes/test_dtypes.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 826f7ae26a797..ff7c38cbce5d1 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -536,13 +536,12 @@ def test_construction_from_string_errors(self, string): 'foo', 'foo[int64]']) def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype - msg = ('Incorrectly formatted string passed to constructor. ' - 'Valid formats include Interval or Inverval[dtype] ' - 'where dtype is numeric, datetime, or timedelta') + msg = (r"""Incorrectly formatted string passed to constructor. + Valid formats include Interval or Inverval\[ dtype\] + where dtype is numeric, datetime, or timedelta""") - with pytest.raises(TypeError) as e: + with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) - assert e == msg def test_subclass(self): a = IntervalDtype('interval[int64]') From c107ccdf5becb26e8131f3ce0c46bcf9a3965db9 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 09:31:29 -0500 Subject: [PATCH 26/33] IntervalA test re-added to construct from string testing --- pandas/tests/dtypes/test_dtypes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index ff7c38cbce5d1..78753ce45f607 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -533,12 +533,12 @@ def test_construction_from_string_errors(self, string): IntervalDtype.construct_from_string(string) @pytest.mark.parametrize('string', [ - 'foo', 'foo[int64]']) + 'foo', 'foo[int64]', 'IntervalA']) def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype - msg = (r"""Incorrectly formatted string passed to constructor. - Valid formats include Interval or Inverval\[ dtype\] - where dtype is numeric, datetime, or timedelta""") + msg = ("Incorrectly formatted string passed to constructor. " + "Valid formats include Interval or Inverval\[dtype\] " + "where dtype is numeric, datetime, or timedelta") with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) From 989942e3148bf9963091ef0153e6be1dcb8f5559 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 11:04:33 -0500 Subject: [PATCH 27/33] error message formatting fix for PEP8 --- pandas/tests/dtypes/test_dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 78753ce45f607..22a831687be53 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -537,7 +537,7 @@ def test_construction_from_string_errors(self, string): def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype msg = ("Incorrectly formatted string passed to constructor. " - "Valid formats include Interval or Inverval\[dtype\] " + r"Valid formats include Interval or Inverval\[dtype\] " "where dtype is numeric, datetime, or timedelta") with pytest.raises(TypeError, match=msg): From d28a0fd760de6064be9d9133cdfcaf4ae3551e38 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 14:16:21 -0500 Subject: [PATCH 28/33] test added for comparing series with strings containing Interval keyword --- pandas/tests/series/test_operators.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 4d3c9926fc5ae..f8ec095c84b63 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -563,6 +563,10 @@ def test_comp_ops_df_compat(self): with pytest.raises(ValueError, match=msg): left.to_frame() < right.to_frame() + def test_compare_series_interval_keyword(self): + s = Series(['IntervalA', 'IntervalB', 'IntervalC']) == 'IntervalA' + assert_series_equal(s, Series([True, False, False])) + class TestSeriesFlexComparisonOps(object): From e72bfb92a32d0508c87b46f5f7da56f157af4ff5 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 14:20:14 -0500 Subject: [PATCH 29/33] regression fix documented in whats new for v0.24.2 --- doc/source/whatsnew/v0.24.2.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 8e59c2300e7ca..45db1e1d5ada0 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -27,6 +27,8 @@ Fixed Regressions - Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`) - Fixed regression in :meth:`Series.min` and :meth:`Series.max` where ``numeric_only=True`` was ignored when the ``Series`` contained ```Categorical`` data (:issue:`25299`) +- Fixed regression in ``IntervalDtype`` construction passing in an approximately correct string could result in RecursionError. (:issue:`25338`) + .. _whatsnew_0242.enhancements: Enhancements From dfe6e99cafabf18bbba72c6589675ccf46fd15e2 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 15:51:46 -0500 Subject: [PATCH 30/33] typo fix in construct_from_string error message --- pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 3c64b5e0e2119..b5b762c5aecf4 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -940,7 +940,7 @@ def construct_from_string(cls, string): return cls(string) msg = ('Incorrectly formatted string passed to constructor. ' - 'Valid formats include Interval or Inverval[dtype] ' + 'Valid formats include Interval or Intverval[dtype] ' 'where dtype is numeric, datetime, or timedelta') raise TypeError(msg) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 22a831687be53..c612c5caa984a 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -537,7 +537,7 @@ def test_construction_from_string_errors(self, string): def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype msg = ("Incorrectly formatted string passed to constructor. " - r"Valid formats include Interval or Inverval\[dtype\] " + r"Valid formats include Interval or Intverval\[dtype\] " "where dtype is numeric, datetime, or timedelta") with pytest.raises(TypeError, match=msg): From f08915f7963dc149db625b1989a130f483d11f28 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 15:53:06 -0500 Subject: [PATCH 31/33] typo fix in construct_from_string error message --- pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index b5b762c5aecf4..11a132c4d14ee 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -940,7 +940,7 @@ def construct_from_string(cls, string): return cls(string) msg = ('Incorrectly formatted string passed to constructor. ' - 'Valid formats include Interval or Intverval[dtype] ' + 'Valid formats include Interval or Interval[dtype] ' 'where dtype is numeric, datetime, or timedelta') raise TypeError(msg) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index c612c5caa984a..1c1442d6f2f23 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -537,7 +537,7 @@ def test_construction_from_string_errors(self, string): def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype msg = ("Incorrectly formatted string passed to constructor. " - r"Valid formats include Interval or Intverval\[dtype\] " + r"Valid formats include Interval or Interval\[dtype\] " "where dtype is numeric, datetime, or timedelta") with pytest.raises(TypeError, match=msg): From f1fabdfd56aeef22e95fe1accad76a90edcbb614 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 21:43:30 -0500 Subject: [PATCH 32/33] whatsnew note adjusted for clarity and style --- doc/source/whatsnew/v0.24.2.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 45db1e1d5ada0..f528c058d2868 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -26,8 +26,7 @@ Fixed Regressions - Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`) - Fixed regression in :meth:`Series.min` and :meth:`Series.max` where ``numeric_only=True`` was ignored when the ``Series`` contained ```Categorical`` data (:issue:`25299`) - -- Fixed regression in ``IntervalDtype`` construction passing in an approximately correct string could result in RecursionError. (:issue:`25338`) +- Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`) .. _whatsnew_0242.enhancements: From 1ed7f8cc474923296d545fda94e61c8226d3fac9 Mon Sep 17 00:00:00 2001 From: zangell44 <42625717+zangell44@users.noreply.github.com> Date: Tue, 19 Feb 2019 21:46:24 -0500 Subject: [PATCH 33/33] formatting update for test_compare_series_interval_keyword --- pandas/tests/series/test_operators.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index f8ec095c84b63..b2aac441db195 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -564,8 +564,11 @@ def test_comp_ops_df_compat(self): left.to_frame() < right.to_frame() def test_compare_series_interval_keyword(self): - s = Series(['IntervalA', 'IntervalB', 'IntervalC']) == 'IntervalA' - assert_series_equal(s, Series([True, False, False])) + # GH 25338 + s = Series(['IntervalA', 'IntervalB', 'IntervalC']) + result = s == 'IntervalA' + expected = Series([True, False, False]) + assert_series_equal(result, expected) class TestSeriesFlexComparisonOps(object):