Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST/CLN: empty DataFrames and some 'empty' Series #25690

Merged
merged 3 commits into from
Mar 27, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ def test_arith_flex_zero_len_raises(self):
# GH 19522 passing fill_value to frame flex arith methods should
# raise even in the zero-length special cases
ser_len0 = pd.Series([])
df_len0 = pd.DataFrame([], columns=['A', 'B'])
df_len0 = pd.DataFrame(columns=['A', 'B'])
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])

with pytest.raises(NotImplementedError, match='fill_value'):
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/frame/test_combine_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ def test_append_list_of_series_dicts(self):
def test_append_empty_dataframe(self):

# Empty df append empty df
df1 = DataFrame([])
df2 = DataFrame([])
df1 = DataFrame()
df2 = DataFrame()
result = df1.append(df2)
expected = df1.copy()
assert_frame_equal(result, expected)
Expand Down Expand Up @@ -576,18 +576,18 @@ def test_combine_first(self, float_frame):
assert_series_equal(combined['A'].reindex(g.index), g['A'])

# corner cases
comb = float_frame.combine_first(DataFrame({}))
comb = float_frame.combine_first(DataFrame())
assert_frame_equal(comb, float_frame)

comb = DataFrame({}).combine_first(float_frame)
comb = DataFrame().combine_first(float_frame)
assert_frame_equal(comb, float_frame)

comb = float_frame.combine_first(DataFrame(index=["faz", "boo"]))
assert "faz" in comb.index

# #2525
df = DataFrame({'a': [1]}, index=[datetime(2012, 1, 1)])
df2 = DataFrame({}, columns=['b'])
df2 = DataFrame(columns=['b'])
result = df.combine_first(df2)
assert 'b' in result

Expand Down
39 changes: 30 additions & 9 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,33 @@

class TestDataFrameConstructors(TestData):

def test_constructor(self):
df = DataFrame()
assert len(df.index) == 0

df = DataFrame(data={})
assert len(df.index) == 0
@pytest.mark.parametrize('constructor', [
lambda: DataFrame(),
lambda: DataFrame(None),
lambda: DataFrame({}),
lambda: DataFrame(()),
lambda: DataFrame([]),
lambda: DataFrame((x for x in [])),
lambda: DataFrame(data=None),
lambda: DataFrame(data={}),
lambda: DataFrame(data=()),
lambda: DataFrame(data=[]),
lambda: DataFrame(data=(x for x in [])),
# these are NOT empty DataFrames
pytest.param(lambda: DataFrame([[]]), marks=pytest.mark.xfail(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to parametrize these in a separate test rather than supplying mark here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was just cautious of making actual behaviour tested and then tested behaviour becoming the accepted behaviour because it's tested. I'm not sure if the index differences are intentional.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah i agree, can you separate these cases out

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update this and merge master, ping on green.

reason='creates a non-zero length RangeIndex')),
pytest.param(lambda: DataFrame([[], []]), marks=pytest.mark.xfail(
reason='creates a non-zero length RangeIndex')),
pytest.param(lambda: DataFrame([(x for x in [])]),
marks=pytest.mark.xfail(
reason='creates a non-zero length RangeIndex'))
])
def test_empty_constructor(self, constructor):
expected = DataFrame()
result = constructor()
assert len(result.index) == 0
assert len(result.columns) == 0
tm.assert_frame_equal(result, expected)

def test_constructor_mixed(self):
index, data = tm.getMixedTypeDict()
Expand Down Expand Up @@ -95,7 +116,7 @@ def test_constructor_dtype_list_data(self):

def test_constructor_list_frames(self):
# see gh-3243
result = DataFrame([DataFrame([])])
result = DataFrame([DataFrame()])
assert result.shape == (1, 0)

result = DataFrame([DataFrame(dict(A=lrange(5)))])
Expand Down Expand Up @@ -265,7 +286,7 @@ def test_constructor_dict(self):
frame = DataFrame({}, index=idx)
assert frame.index is idx

# empty with index and columns
# empty dict with index and columns
idx = Index([0, 1, 2])
frame = DataFrame({}, index=idx, columns=idx)
assert frame.index is idx
Expand Down Expand Up @@ -1122,7 +1143,7 @@ def test_constructor_list_of_series(self):
result2 = DataFrame(data, index=np.arange(6))
tm.assert_frame_equal(result, result2)

result = DataFrame([Series({})])
result = DataFrame([Series()])
expected = DataFrame(index=[0])
tm.assert_frame_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_pivot_duplicates(self):
data.pivot('a', 'b', 'c')

def test_pivot_empty(self):
df = DataFrame({}, columns=['a', 'b', 'c'])
df = DataFrame(columns=['a', 'b', 'c'])
result = df.pivot('a', 'b', 'c')
expected = DataFrame()
tm.assert_frame_equal(result, expected, check_names=False)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,8 +1064,8 @@ def test_size(df):
tm.assert_series_equal(left, right, check_names=False)

# GH11699
df = DataFrame([], columns=['A', 'B'])
out = Series([], dtype='int64', index=Index([], name='A'))
df = DataFrame(columns=['A', 'B'])
out = Series(dtype='int64', index=Index([], name='A'))
tm.assert_series_equal(df.groupby('A').size(), out)


Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def f1(x):
if y.empty:
multiindex = MultiIndex(levels=[[]] * 2, codes=[[]] * 2,
names=['b', 'c'])
res = DataFrame(None, columns=['a'], index=multiindex)
res = DataFrame(columns=['a'], index=multiindex)
return res
else:
y = y.set_index(['b', 'c'])
Expand All @@ -317,7 +317,7 @@ def f3(x):
if y.empty:
multiindex = MultiIndex(levels=[[]] * 2, codes=[[]] * 2,
names=['foo', 'bar'])
res = DataFrame(None, columns=['a', 'b'], index=multiindex)
res = DataFrame(columns=['a', 'b'], index=multiindex)
return res
else:
return y
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def test_groupby_with_single_column(self):
df = pd.DataFrame({'a': list('abssbab')})
tm.assert_frame_equal(df.groupby('a').get_group('a'), df.iloc[[0, 5]])
# GH 13530
exp = pd.DataFrame([], index=pd.Index(['a', 'b', 's'], name='a'))
exp = pd.DataFrame(index=pd.Index(['a', 'b', 's'], name='a'))
tm.assert_frame_equal(df.groupby('a').count(), exp)
tm.assert_frame_equal(df.groupby('a').sum(), exp)
tm.assert_frame_equal(df.groupby('a').nth(1), exp)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexing/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def setup_method(self, method):
self.frame_ts_rev = DataFrame(np.random.randn(4, 4),
index=dates_rev)

self.frame_empty = DataFrame({})
self.series_empty = Series({})
self.frame_empty = DataFrame()
self.series_empty = Series()

# form agglomerates
for o in self._objs:
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/json/test_json_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def test_set_names_unset(self, idx, nm, prop):
])
def test_warns_non_roundtrippable_names(self, idx):
# GH 19130
df = pd.DataFrame([[]], index=idx)
df = pd.DataFrame(index=idx)
df.index.name = 'index'
with tm.assert_produces_warning():
set_default_names(df)
Expand Down Expand Up @@ -566,7 +566,7 @@ def test_multiindex(self, index_names):

def test_empty_frame_roundtrip(self):
# GH 21287
df = pd.DataFrame([], columns=['a', 'b', 'c'])
df = pd.DataFrame(columns=['a', 'b', 'c'])
expected = df.copy()
out = df.to_json(orient='table')
result = pd.read_json(out, orient='table')
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def setup(self, datapath):
self.objSeries.name = 'objects'

self.empty_series = Series([], index=[])
self.empty_frame = DataFrame({})
self.empty_frame = DataFrame()

self.frame = _frame.copy()
self.frame2 = _frame2.copy()
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/io/parser/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,7 @@ def test_empty_with_index(all_parsers):
parser = all_parsers
result = parser.read_csv(StringIO(data), index_col=0)

expected = DataFrame([], columns=["y"], index=Index([], name="x"))
expected = DataFrame(columns=["y"], index=Index([], name="x"))
tm.assert_frame_equal(result, expected)


Expand All @@ -1161,7 +1161,7 @@ def test_empty_with_multi_index(all_parsers):
parser = all_parsers
result = parser.read_csv(StringIO(data), index_col=["x", "y"])

expected = DataFrame([], columns=["z"],
expected = DataFrame(columns=["z"],
index=MultiIndex.from_arrays(
[[]] * 2, names=["x", "y"]))
tm.assert_frame_equal(result, expected)
Expand All @@ -1172,7 +1172,7 @@ def test_empty_with_reversed_multi_index(all_parsers):
parser = all_parsers
result = parser.read_csv(StringIO(data), index_col=[1, 0])

expected = DataFrame([], columns=["z"],
expected = DataFrame(columns=["z"],
index=MultiIndex.from_arrays(
[[]] * 2, names=["y", "x"]))
tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -1284,7 +1284,7 @@ def test_numeric_range_too_wide(all_parsers, exp_data):
def test_empty_with_nrows_chunksize(all_parsers, iterator):
# see gh-9535
parser = all_parsers
expected = DataFrame([], columns=["foo", "bar"])
expected = DataFrame(columns=["foo", "bar"])

nrows = 10
data = StringIO("foo,bar\n")
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/test_index_col.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_index_col_empty_data(all_parsers, index_col, kwargs):
parser = all_parsers
result = parser.read_csv(StringIO(data), index_col=index_col)

expected = DataFrame([], **kwargs)
expected = DataFrame(**kwargs)
tm.assert_frame_equal(result, expected)


Expand All @@ -115,7 +115,7 @@ def test_empty_with_index_col_false(all_parsers):
parser = all_parsers
result = parser.read_csv(StringIO(data), index_col=False)

expected = DataFrame([], columns=["x", "y"])
expected = DataFrame(columns=["x", "y"])
tm.assert_frame_equal(result, expected)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_period_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ def test_resample_with_only_nat(self):
pi = PeriodIndex([pd.NaT] * 3, freq='S')
frame = DataFrame([2, 3, 5], index=pi)
expected_index = PeriodIndex(data=[], freq=pi.freq)
expected = DataFrame([], index=expected_index)
expected = DataFrame(index=expected_index)
result = frame.resample('1s').mean()
assert_frame_equal(result, expected)

Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,10 +386,10 @@ def test_left_merge_empty_dataframe(self):
dict(left_on='a', right_on='x')])
def test_merge_left_empty_right_empty(self, join_type, kwarg):
# GH 10824
left = pd.DataFrame([], columns=['a', 'b', 'c'])
right = pd.DataFrame([], columns=['x', 'y', 'z'])
left = pd.DataFrame(columns=['a', 'b', 'c'])
right = pd.DataFrame(columns=['x', 'y', 'z'])

exp_in = pd.DataFrame([], columns=['a', 'b', 'c', 'x', 'y', 'z'],
exp_in = pd.DataFrame(columns=['a', 'b', 'c', 'x', 'y', 'z'],
index=pd.Index([], dtype=object),
dtype=object)

Expand All @@ -398,7 +398,7 @@ def test_merge_left_empty_right_empty(self, join_type, kwarg):

def test_merge_left_empty_right_notempty(self):
# GH 10824
left = pd.DataFrame([], columns=['a', 'b', 'c'])
left = pd.DataFrame(columns=['a', 'b', 'c'])
right = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
columns=['x', 'y', 'z'])

Expand Down Expand Up @@ -444,7 +444,7 @@ def test_merge_left_notempty_right_empty(self):
# GH 10824
left = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
columns=['a', 'b', 'c'])
right = pd.DataFrame([], columns=['x', 'y', 'z'])
right = pd.DataFrame(columns=['x', 'y', 'z'])

exp_out = pd.DataFrame({'a': [1, 4, 7],
'b': [2, 5, 8],
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ def test_append(self, sort):
mixed_appended2.reindex(columns=['A', 'B', 'C', 'D']))

# append empty
empty = DataFrame({})
empty = DataFrame()

appended = self.frame.append(empty)
tm.assert_frame_equal(self.frame, appended)
Expand Down Expand Up @@ -868,7 +868,7 @@ def test_append_many(self, sort):

def test_append_preserve_index_name(self):
# #980
df1 = DataFrame(data=None, columns=['A', 'B', 'C'])
df1 = DataFrame(columns=['A', 'B', 'C'])
df1 = df1.set_index(['A'])
df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]],
columns=['A', 'B', 'C'])
Expand Down
26 changes: 25 additions & 1 deletion pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,30 @@

class TestSeriesConstructors():

@pytest.mark.parametrize('constructor,check_index_type', [
# NOTE: some overlap with test_constructor_empty but that test does not
WillAyd marked this conversation as resolved.
Show resolved Hide resolved
# test for None or an empty generator.
# test_constructor_pass_none tests None but only with the index also
# passed.
(lambda: Series(), True),
(lambda: Series(None), True),
(lambda: Series({}), True),
(lambda: Series(()), False), # creates a RangeIndex
(lambda: Series([]), False), # creates a RangeIndex
(lambda: Series((x for x in [])), False), # creates a RangeIndex
(lambda: Series(data=None), True),
(lambda: Series(data={}), True),
(lambda: Series(data=()), False), # creates a RangeIndex
(lambda: Series(data=[]), False), # creates a RangeIndex
(lambda: Series(data=(x for x in [])), False), # creates a RangeIndex
])
def test_empty_constructor(self, constructor, check_index_type):
expected = Series()
result = constructor()
assert len(result.index) == 0
tm.assert_series_equal(result, expected,
check_index_type=check_index_type)

def test_invalid_dtype(self):
# GH15520
msg = 'not understood'
Expand Down Expand Up @@ -66,7 +90,7 @@ def test_constructor(self, datetime_series):
assert mixed[1] is np.NaN

assert not empty_series.index.is_all_dates
assert not Series({}).index.is_all_dates
assert not Series().index.is_all_dates

# exception raised is of type Exception
with pytest.raises(Exception, match="Data must be 1-dimensional"):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def test_count_level_corner(self):

df = self.frame[:0]
result = df.count(level=0)
expected = DataFrame({}, index=s.index.levels[0],
expected = DataFrame(index=s.index.levels[0],
columns=df.columns).fillna(0).astype(np.int64)
tm.assert_frame_equal(result, expected)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1901,7 +1901,7 @@ def test_empty_str_methods(self):

def test_empty_str_methods_to_frame(self):
empty = Series(dtype=str)
empty_df = DataFrame([])
empty_df = DataFrame()
tm.assert_frame_equal(empty_df, empty.str.partition('a'))
tm.assert_frame_equal(empty_df, empty.str.rpartition('a'))

Expand Down Expand Up @@ -2551,7 +2551,7 @@ def test_split_blank_string(self):
# expand blank split GH 20067
values = Series([''], name='test')
result = values.str.split(expand=True)
exp = DataFrame([[]])
exp = DataFrame([[]]) # NOTE: this is NOT an empty DataFrame
tm.assert_frame_equal(result, exp)

values = Series(['a b c', 'a b', '', ' '], name='test')
Expand Down