From d0fe636b7357bf16d17e33dc1dd09ad25fd63a16 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Wed, 30 Oct 2019 11:29:07 -0700
Subject: [PATCH] Fixed segfaults and incorrect results in GroupBy.quantile
 with NA Values in Grouping (#29173)

---
 doc/source/whatsnew/v1.0.0.rst        |  1 +
 pandas/_libs/groupby.pyx              |  3 +++
 pandas/tests/groupby/test_function.py | 23 +++++++++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index fa1669b1f3343..a9218650d4fe7 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -411,6 +411,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
 - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
 - Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`)
+- Bug in :meth:`DataFrameGroupBy.quantile` where NA values in the grouping could cause segfaults or incorrect results (:issue:`28882`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 71de80da699e9..b4300c162156f 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -766,6 +766,9 @@ def group_quantile(ndarray[float64_t] out,
     with nogil:
         for i in range(N):
             lab = labels[i]
+            if lab == -1:  # NA group label
+                continue
+
             counts[lab] += 1
             if not mask[i]:
                 non_na_counts[lab] += 1
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 571e710ba8928..2d7dfe49dc038 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1373,6 +1373,29 @@ def test_quantile_out_of_bounds_q_raises():
         g.quantile(-1)
 
 
+def test_quantile_missing_group_values_no_segfaults():
+    # GH 28662
+    data = np.array([1.0, np.nan, 1.0])
+    df = pd.DataFrame(dict(key=data, val=range(3)))
+
+    # Random segfaults; would have been guaranteed in loop
+    grp = df.groupby("key")
+    for _ in range(100):
+        grp.quantile()
+
+
+def test_quantile_missing_group_values_correct_results():
+    # GH 28662
+    data = np.array([1.0, np.nan, 3.0, np.nan])
+    df = pd.DataFrame(dict(key=data, val=range(4)))
+
+    result = df.groupby("key").quantile()
+    expected = pd.DataFrame(
+        [1.0, 3.0], index=pd.Index([1.0, 3.0], name="key"), columns=["val"]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 # pipe
 # --------------------------------