pydata · shoyer · Jun 16, 2016 · Jun 8, 2016 · Jun 9, 2016 · Jun 9, 2016
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -59,6 +59,10 @@ Bug fixes
   ``keep_attrs=True`` option. By
   `Jeremy McGibbon <https://github.com/mcgibbon>`_.
 
+- Concatenating xarray objects along an axis with a MultiIndex or PeriodIndex
+  preserves the nature of the index (:issue:`875`). By
+  `Stephan Hoyer <https://github.com/shoyer>`_.
+
 - ``decode_cf_timedelta`` now accepts arrays with ``ndim`` >1 (:issue:`842`).
    This fixes issue :issue:`665`.
    `Filipe Fernandes <https://github.com/ocefpaf>`_.

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -4,7 +4,7 @@
 
 from . import utils
 from .pycompat import iteritems, reduce, OrderedDict, basestring
-from .variable import Variable, as_variable, Coordinate
+from .variable import Variable, as_variable, Coordinate, concat as concat_vars
 
 
 def concat(objs, dim=None, data_vars='all', coords='different',
@@ -265,7 +265,7 @@ def ensure_common_dims(vars):
     # stack up each variable to fill-out the dataset
     for k in concat_over:
         vars = ensure_common_dims([ds.variables[k] for ds in datasets])
-        combined = Variable.concat(vars, dim, positions)
+        combined = concat_vars(vars, dim, positions)
         insert_result_variable(k, combined)
 
     result = Dataset(result_vars, attrs=result_attrs)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -2,6 +2,7 @@
 import numpy as np
 import pandas as pd
 
+from . import nputils
 from . import ops
 from .combine import concat
 from .common import (
@@ -66,6 +67,52 @@ def _dummy_copy(xarray_obj):
         raise AssertionError
     return res
 
+def _is_one_or_none(obj):
+    return obj == 1 or obj is None
+
+
+def _consolidate_slices(slices):
+    """Consolidate adjacent slices in a list of slices.
+    """
+    result = []
+    for slice_ in slices:
+        if not isinstance(slice_, slice):
+            raise ValueError('list element is not a slice: %r' % slice_)
+        if (result and last_slice.stop == slice_.start
+                and _is_one_or_none(last_slice.step)
+                and _is_one_or_none(slice_.step)):
+            last_slice = slice(last_slice.start, slice_.stop, slice_.step)
+            result[-1] = last_slice
+        else:
+            result.append(slice_)
+            last_slice = slice_
+    return result
+
+
+def _inverse_permutation_indices(positions):
+    """Like inverse_permutation, but also handles slices.
+
+    Parameters
+    ----------
+    positions : list of np.ndarray or slice objects.
+        If slice objects, all are assumed to be slices.
+
+    Returns
+    -------
+    np.ndarray of indices or None, if no permutation is necessary.
+    """
+    if not positions:
+        return None
+
+    if isinstance(positions[0], slice):
+        positions = _consolidate_slices(positions)
+        if positions == slice(None):
+            return None
+        positions = [np.arange(sl.start, sl.stop, sl.step) for sl in positions]
+
+    indices = nputils.inverse_permutation(np.concatenate(positions))
+    return indices
+
 
 class GroupBy(object):
     """A object that implements the split-apply-combine pattern.
@@ -302,6 +349,16 @@ def assign_coords(self, **kwargs):
         return self.apply(lambda ds: ds.assign_coords(**kwargs))
 
 
+def _maybe_reorder(xarray_obj, concat_dim, positions):
+    order = _inverse_permutation_indices(positions)
+
+    if order is None:
+        return xarray_obj
+    else:
+        dim, = concat_dim.dims
+        return xarray_obj[{dim: order}]
+
+
 class DataArrayGroupBy(GroupBy, ImplementsArrayReduce):
     """GroupBy object specialized to grouping DataArray objects
     """
@@ -313,14 +370,14 @@ def _iter_grouped_shortcut(self):
         for indices in self.group_indices:
             yield var[{self.group_dim: indices}]
 
-    def _concat_shortcut(self, applied, concat_dim, positions):
+    def _concat_shortcut(self, applied, concat_dim, positions=None):
         # nb. don't worry too much about maintaining this method -- it does
         # speed things up, but it's not very interpretable and there are much
         # faster alternatives (e.g., doing the grouped aggregation in a
         # compiled language)
-        stacked = Variable.concat(
-            applied, concat_dim, positions, shortcut=True)
-        result = self.obj._replace_maybe_drop_dims(stacked)
+        stacked = Variable.concat(applied, concat_dim, shortcut=True)
+        reordered = _maybe_reorder(stacked, concat_dim, positions)
+        result = self.obj._replace_maybe_drop_dims(reordered)
         result._coords[concat_dim.name] = as_variable(concat_dim, copy=True)
         return result
 
@@ -391,7 +448,8 @@ def _concat(self, applied, shortcut=False):
         if shortcut:
             combined = self._concat_shortcut(applied, concat_dim, positions)
         else:
-            combined = concat(applied, concat_dim, positions=positions)
+            combined = concat(applied, concat_dim)
+            combined = _maybe_reorder(combined, concat_dim, positions)
 
         if isinstance(combined, type(self.obj)):
             combined = self._restore_dim_order(combined)
@@ -472,8 +530,10 @@ def apply(self, func, **kwargs):
     def _concat(self, applied):
         applied_example, applied = peek_at(applied)
         concat_dim, positions = self._infer_concat_args(applied_example)
-        combined = concat(applied, concat_dim, positions=positions)
-        return combined
+
+        combined = concat(applied, concat_dim)
+        reordered = _maybe_reorder(combined, concat_dim, positions)
+        return reordered
 
     def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
         """Reduce the items in this group by applying `func` along some

diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
@@ -34,24 +34,23 @@ def nanlast(values, axis):
     return _select_along_axis(values, idx_last, axis)
 
 
-def _calc_concat_shape(arrays, axis=0):
-    first_shape = arrays[0].shape
-    length = builtins.sum(a.shape[axis] for a in arrays)
-    result_shape = first_shape[:axis] + (length,) + first_shape[(axis + 1):]
-    return result_shape
-
-
-def interleaved_concat(arrays, indices, axis=0):
-    arrays = [np.asarray(a) for a in arrays]
-    axis = _validate_axis(arrays[0], axis)
-    result_shape = _calc_concat_shape(arrays, axis=axis)
-    dtype = reduce(np.promote_types, [a.dtype for a in arrays])
-    result = np.empty(result_shape, dtype)
-    key = [slice(None)] * result.ndim
-    for a, ind in zip(arrays, indices):
-        key[axis] = ind
-        result[key] = a
-    return result
+def inverse_permutation(indices):
+    """Return indices for an inverse permutation.
+
+    Parameters
+    ----------
+    indices : 1D np.ndarray with dtype=int
+        Integer positions to assign elements to.
+
+    Returns
+    -------
+    inverse_permutation : 1D np.ndarray with dtype=int
+        Integer indices to take from the original array to create the
+        permutation.
+    """
+    inverse_permutation = np.empty(len(indices), dtype=np.int64)
+    inverse_permutation[indices] = np.arange(len(indices))
+    return inverse_permutation
 
 
 def _ensure_bool_is_ndarray(result, *args):

diff --git a/xarray/core/ops.py b/xarray/core/ops.py
@@ -9,10 +9,7 @@
 
 from . import npcompat
 from .pycompat import PY3, dask_array_type
-from .nputils import (
-    nanfirst, nanlast, interleaved_concat as _interleaved_concat_numpy,
-    array_eq, array_ne, _validate_axis, _calc_concat_shape
-)
+from .nputils import nanfirst, nanlast, array_eq, array_ne
 
 
 try:
@@ -100,67 +97,6 @@ def _fail_on_dask_array_input(values, msg=None, func_name=None):
 tensordot = _dask_or_eager_func('tensordot', n_array_args=2)
 
 
-def _interleaved_indices_required(indices):
-    """With dask, we care about data locality and would rather avoid splitting
-    splitting up each arrays into single elements. This routine checks to see
-    if we really need the "interleaved" part of interleaved_concat.
-
-    We don't use for the pure numpy version of interleaved_concat, because it's
-    just as fast or faster to directly do the interleaved concatenate rather
-    than check if we could simply it.
-    """
-    next_expected = 0
-    for ind in indices:
-        if isinstance(ind, slice):
-            if ((ind.start or 0) != next_expected or
-                    ind.step not in (1, None)):
-                return True
-            next_expected = ind.stop
-        else:
-            ind = np.asarray(ind)
-            expected = np.arange(next_expected, next_expected + ind.size)
-            if (ind != expected).any():
-                return True
-            next_expected = ind[-1] + 1
-    return False
-
-
-def _interleaved_concat_slow(arrays, indices, axis=0):
-    """A slow version of interleaved_concat that also works on dask arrays
-    """
-    axis = _validate_axis(arrays[0], axis)
-
-    result_shape = _calc_concat_shape(arrays, axis=axis)
-    length = result_shape[axis]
-    array_lookup = np.empty(length, dtype=int)
-    element_lookup = np.empty(length, dtype=int)
-
-    for n, ind in enumerate(indices):
-        if isinstance(ind, slice):
-            ind = np.arange(*ind.indices(length))
-        for m, i in enumerate(ind):
-            array_lookup[i] = n
-            element_lookup[i] = m
-
-    split_arrays = [arrays[n][(slice(None),) * axis + (slice(m, m + 1),)]
-                    for (n, m) in zip(array_lookup, element_lookup)]
-    return concatenate(split_arrays, axis)
-
-
-def interleaved_concat(arrays, indices, axis=0):
-    """Concatenate each array along the given axis, but also assign each array
-    element into the location given by indices. This operation is used for
-    groupby.transform.
-    """
-    if has_dask and isinstance(arrays[0], da.Array):
-        if not _interleaved_indices_required(indices):
-            return da.concatenate(arrays, axis)
-        else:
-            return _interleaved_concat_slow(arrays, indices, axis)
-    else:
-        return _interleaved_concat_numpy(arrays, indices, axis)
-
-
 def asarray(data):
     return data if isinstance(data, dask_array_type) else np.asarray(data)