Skip to content

Commit

Permalink
BUG: Fix quantile calculation. Only move -1 labels if there are any l…
Browse files Browse the repository at this point in the history
  • Loading branch information
mabelvj committed Apr 26, 2020
1 parent 8e6af0e commit b8a19bf
Showing 1 changed file with 39 additions and 37 deletions.
76 changes: 39 additions & 37 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -779,47 +779,49 @@ def group_quantile(ndarray[float64_t] out,
non_na_counts[lab] += 1

if labels.any():
# Get an index of values sorted by labels and then values
# Put '-1' (NaN) labels as the last group so it does not interfere
# with the calculations.
labels[labels==-1] = np.max(labels) + 1
order = (values, labels)
sort_arr= np.lexsort(order).astype(np.int64, copy=False)
with nogil:
for i in range(ngroups):
# Figure out how many group elements there are
grp_sz = counts[i]
non_na_sz = non_na_counts[i]
# Get an index of values sorted by labels and then values
order = (values, labels)
sort_arr = np.lexsort(order).astype(np.int64, copy=False)
with nogil:
for i in range(ngroups):
# Figure out how many group elements there are
grp_sz = counts[i]
non_na_sz = non_na_counts[i]

if non_na_sz == 0:
out[i] = NaN
if non_na_sz == 0:
out[i] = NaN
else:
# Calculate where to retrieve the desired value
# Casting to int will intentionally truncate result
idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))

val = values[sort_arr[idx]]
# If requested quantile falls evenly on a particular index
# then write that index's value out. Otherwise interpolate
q_idx = q * (non_na_sz - 1)
frac = q_idx % 1

if frac == 0.0 or interp == INTERPOLATION_LOWER:
out[i] = val
else:
# Calculate where to retrieve the desired value
# Casting to int will intentionally truncate result
idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))

val = values[sort_arr[idx]]
# If requested quantile falls evenly on a particular index
# then write that index's value out. Otherwise interpolate
q_idx = q * (non_na_sz - 1)
frac = q_idx % 1

if frac == 0.0 or interp == INTERPOLATION_LOWER:
out[i] = val
else:
next_val = values[sort_arr[idx + 1]]
if interp == INTERPOLATION_LINEAR:
out[i] = val + (next_val - val) * frac
elif interp == INTERPOLATION_HIGHER:
next_val = values[sort_arr[idx + 1]]
if interp == INTERPOLATION_LINEAR:
out[i] = val + (next_val - val) * frac
elif interp == INTERPOLATION_HIGHER:
out[i] = next_val
elif interp == INTERPOLATION_MIDPOINT:
out[i] = (val + next_val) / 2.0
elif interp == INTERPOLATION_NEAREST:
if frac > .5 or (frac == .5 and q > .5): # Always OK?
out[i] = next_val
elif interp == INTERPOLATION_MIDPOINT:
out[i] = (val + next_val) / 2.0
elif interp == INTERPOLATION_NEAREST:
if frac > .5 or (frac == .5 and q > .5): # Always OK?
out[i] = next_val
else:
out[i] = val

# Increment the index reference in sorted_arr for the next group
grp_start += grp_sz
else:
out[i] = val

# Increment the index reference in sorted_arr for the next group
grp_start += grp_sz


# ----------------------------------------------------------------------
Expand Down

0 comments on commit b8a19bf

Please sign in to comment.