pandas-dev · h-vetinari · Jan 10, 2019 · Jan 10, 2019 · Jan 10, 2019 · Jan 10, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -93,6 +93,34 @@ Other API Changes
 Deprecations
 ~~~~~~~~~~~~
 
+**Lists as arrays in :meth:`DataFrame.set_index`**
+
+Currently, :meth:`DataFrame.set_index` accepts lists as meaning two different things - as a list of labels, and as an array-like collection of values.
+This ambiguity decides in favor of the list of labels, but nested lists are interpreted as arrays:
+
+.. ipython:: python
+    :okwarning:
+
+    df = pd.DataFrame(np.reshape(np.arange(12), (3, 4)),
+                      columns=['a', 'b', 'c', 'd'])
+    df.set_index(['a', 'b', 'c'])
+    df.set_index([['a', 'b', 'c']])
+
+The latter case has now been deprecated and will be removed in a future version. As a replacement,
+it is suggested to wrap the list in a :class:`Series`, :class:`Index`, ``np.array`` or an iterator.
+
+.. ipython:: python
+
+    df.set_index(pd.Series(['a', 'b', 'c']))
+
+It remains possible to use lists as collecting several column keys or arrays to create multiple levels of a :class:`MultiIndex`.
+
+.. ipython:: ipython
+
+    df.set_index(['a', pd.Series(['a', 'b', 'c'])])
+
+**Other deprecations**
+
 - Deprecated the `M (months)` and `Y (year)` `units` parameter of :func: `pandas.to_timedelta`, :func: `pandas.Timedelta` and :func: `pandas.TimedeltaIndex` (:issue:`16344`)
 
 .. _whatsnew_0250.prior_deprecations:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4033,6 +4033,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
             arbitrary combination of column keys and arrays. Here, "array"
             encompasses :class:`Series`, :class:`Index`, ``np.ndarray``, and
             instances of :class:`abc.Iterator`.
+            Lists (in the sense of a sequence of values, not column labels)
+            have been deprecated, and will be removed in a future version.
         drop : bool, default True
             Delete columns to be used as the new index.
         append : bool, default False
@@ -4116,13 +4118,16 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                    'one-dimensional arrays.')
 
         missing = []
+        depr_warn = False
         for col in keys:
             if isinstance(col, (ABCIndexClass, ABCSeries, np.ndarray,
-                                list, Iterator)):
+                                Iterator)):
                 # arrays are fine as long as they are one-dimensional
                 # iterators get converted to list below
                 if getattr(col, 'ndim', 1) != 1:
                     raise ValueError(err_msg)
+            elif isinstance(col, list):
+                depr_warn = True
             else:
                 # everything else gets tried as a key; see GH 24969
                 try:
@@ -4136,6 +4141,13 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
 
         if missing:
             raise KeyError('None of {} are in the columns'.format(missing))
+        if depr_warn:
+            msg = ('Passing lists within a list to the parameter "keys" is '
+                   'deprecated and will be removed in a future version. To '
+                   'silence this warning, wrap the lists in a Series / Index '
+                   'or np.ndarray. E.g. df.set_index(["A", [1, 2, 3]]) should '
+                   'be passed as df.set_index(["A", pd.Series([1, 2, 3])]).')
+            warnings.warn(msg, FutureWarning, stacklevel=2)
 
         if inplace:
             frame = self

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
@@ -115,10 +115,8 @@ def test_set_index_after_mutation(self):
         tm.assert_frame_equal(result, expected)
 
     # MultiIndex constructor does not work directly on Series -> lambda
-    # Add list-of-list constructor because list is ambiguous -> lambda
     # also test index name if append=True (name is duplicate here for B)
-    @pytest.mark.parametrize('box', [Series, Index, np.array,
-                                     list, lambda x: [list(x)],
+    @pytest.mark.parametrize('box', [Series, Index, np.array, list,
                                      lambda x: MultiIndex.from_arrays([x])])
     @pytest.mark.parametrize('append, index_name', [(True, None),
                              (True, 'B'), (True, 'test'), (False, None)])
@@ -135,7 +133,7 @@ def test_set_index_pass_single_array(self, frame_of_index_cols,
             with pytest.raises(KeyError, match=msg):
                 df.set_index(key, drop=drop, append=append)
         else:
-            # np.array/list-of-list "forget" the name of B
+            # np.array "forgets" the name of B
             name_mi = getattr(key, 'names', None)
             name = [getattr(key, 'name', None)] if name_mi is None else name_mi
 
@@ -163,9 +161,13 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
 
         keys = ['A', box(df['B'])]
         # np.array/list "forget" the name of B
-        names = ['A', None if box in [np.array, list, tuple, iter] else 'B']
+        names = ['A', None if box in [np.array, list] else 'B']
 
-        result = df.set_index(keys, drop=drop, append=append)
+        if box == list:
+            with tm.assert_produces_warning(FutureWarning):
+                result = df.set_index(keys, drop=drop, append=append)
+        else:
+            result = df.set_index(keys, drop=drop, append=append)
 
         # only valid column keys are dropped
         # since B is always passed as array above, only A is dropped, if at all
@@ -193,7 +195,12 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
         df.index.name = index_name
 
         keys = [box1(df['A']), box2(df['A'])]
-        result = df.set_index(keys, drop=drop, append=append)
+
+        if box1 == list or box2 == list:
+            with tm.assert_produces_warning(FutureWarning):
+                result = df.set_index(keys, drop=drop, append=append)
+        else:
+            result = df.set_index(keys, drop=drop, append=append)
 
         # if either box is iter, it has been consumed; re-read
         keys = [box1(df['A']), box2(df['A'])]
@@ -206,8 +213,16 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
         # to test against already-tested behaviour, we add sequentially,
         # hence second append always True; must wrap keys in list, otherwise
         # box = list would be interpreted as keys
-        expected = df.set_index([keys[0]], drop=first_drop, append=append)
-        expected = expected.set_index([keys[1]], drop=drop, append=True)
+        if box1 == list or box2 == list:
+            with tm.assert_produces_warning(FutureWarning):
+                expected = df.set_index([keys[0]], drop=first_drop,
+                                        append=append)
+                expected = expected.set_index([keys[1]], drop=drop,
+                                              append=True)
+        else:
+            expected = df.set_index([keys[0]], drop=first_drop, append=append)
+            expected = expected.set_index([keys[1]], drop=drop, append=True)
+
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize('append', [True, False])