From 37112839de1647f2da9ad02e7175fdd45f182a87 Mon Sep 17 00:00:00 2001
From: Christopher Ariza <ariza@flexatone.com>
Date: Thu, 6 Jun 2024 10:35:30 -0700
Subject: [PATCH 1/6] preliminary setup fpr arg_equal_1d

---
 src/__init__.py  |  1 +
 src/__init__.pyi |  1 +
 src/_arraykit.c  | 31 +++++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/src/__init__.py b/src/__init__.py
index b88ea42c..62d2af5a 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -29,3 +29,4 @@
 from ._arraykit import first_true_2d as first_true_2d
 from ._arraykit import slice_to_ascending_slice as slice_to_ascending_slice
 from ._arraykit import nonzero_1d as nonzero_1d
+from ._arraykit import arg_equal_1d as arg_equal_1d
diff --git a/src/__init__.pyi b/src/__init__.pyi
index 16805f91..34088ae2 100644
--- a/src/__init__.pyi
+++ b/src/__init__.pyi
@@ -160,4 +160,5 @@ def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) ->
 def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ...
 def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ...
 def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ...
+def arg_equal_1d(__array: np.ndarray, __value: tp.Any, /) -> np.ndarray: ...
 def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ...
diff --git a/src/_arraykit.c b/src/_arraykit.c
index e5947966..c6f4c77f 100644
--- a/src/_arraykit.c
+++ b/src/_arraykit.c
@@ -3672,6 +3672,36 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
 }
 
 
+//------------------------------------------------------------------------------
+
+static inline PyObject*
+AK_arg_equal_1d(PyArrayObject* array, PyObject* value) {
+    Py_RETURN_NONE;
+}
+
+
+static PyObject*
+arg_equal_1d(PyObject *Py_UNUSED(m), PyObject *args) {
+    PyArrayObject* array;
+    PyObject* value;
+    if (!PyArg_ParseTuple(args,
+            "O!O:arg_equal_1d",
+            &PyArray_Type, &array,
+            &value)) {
+        return NULL;
+    }
+
+    if (PyArray_NDIM(array) != 1) {
+        PyErr_SetString(PyExc_ValueError, "Array must be 1-dimensional");
+        return NULL;
+    }
+    return AK_arg_equal_1d(array, value);
+}
+
+
+
+//------------------------------------------------------------------------------
+
 static char *first_true_1d_kwarg_names[] = {
     "array",
     "forward",
@@ -7310,6 +7340,7 @@ static PyMethodDef arraykit_methods[] =  {
             NULL},
     {"count_iteration", count_iteration, METH_O, NULL},
     {"nonzero_1d", nonzero_1d, METH_O, NULL},
+    {"arg_equal_1d", arg_equal_1d, METH_VARARGS, NULL},
     {"isna_element",
             (PyCFunction)isna_element,
             METH_VARARGS | METH_KEYWORDS,

From 34a2153759babc9077298e20b8bc1dc9957f9f43 Mon Sep 17 00:00:00 2001
From: Christopher Ariza <ariza@flexatone.com>
Date: Thu, 6 Jun 2024 10:44:04 -0700
Subject: [PATCH 2/6] preliminary interface tests

---
 test/test_arg_equal_1d.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 test/test_arg_equal_1d.py

diff --git a/test/test_arg_equal_1d.py b/test/test_arg_equal_1d.py
new file mode 100644
index 00000000..322155a6
--- /dev/null
+++ b/test/test_arg_equal_1d.py
@@ -0,0 +1,14 @@
+import unittest
+import numpy as np
+
+from arraykit import arg_equal_1d
+
+class TestUnit(unittest.TestCase):
+
+    def test_arg_equal_1d_a1(self) -> None:
+        a = np.arange(6).reshape(2, 3)
+        with self.assertRaises(TypeError):
+            arg_equal_1d(a)
+
+        with self.assertRaises(ValueError):
+            arg_equal_1d(a, None)

From 48d257d9ddf7a7a377e8df9508b835177d03c7cd Mon Sep 17 00:00:00 2001
From: Christopher Ariza <ariza@flexatone.com>
Date: Thu, 6 Jun 2024 11:52:25 -0700
Subject: [PATCH 3/6] preliminary sketch

---
 src/_arraykit.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/src/_arraykit.c b/src/_arraykit.c
index c6f4c77f..1e98d867 100644
--- a/src/_arraykit.c
+++ b/src/_arraykit.c
@@ -3674,9 +3674,65 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
 
 //------------------------------------------------------------------------------
 
+#define NONZERO_APPEND_INDEX_RELATIVE {                                      \
+    if (AK_UNLIKELY(count == capacity)) {                                    \
+        capacity <<= 1;                                                      \
+        indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\
+        if (indices == NULL) {                                               \
+            return NULL;                                                     \
+        }                                                                    \
+    }                                                                        \
+    indices[count++] = p - p_start;                                          \
+}                                                                            \
+
+
+
 static inline PyObject*
 AK_arg_equal_1d(PyArrayObject* array, PyObject* value) {
-    Py_RETURN_NONE;
+    PyObject* final;
+    npy_intp count_max = PyArray_SIZE(array);
+    if (count_max == 0) { // return empty array
+        npy_intp dims = {count_max};
+        final = PyArray_SimpleNew(1, &dims, NPY_INT64);
+        PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
+        return final;
+    }
+
+    // lldiv_t size_div = lldiv((long long)size, 8); // quot, rem
+    Py_ssize_t count = 0;
+    // the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size
+    Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 8;
+    npy_int64* indices = (npy_int64*)malloc(sizeof(npy_int64) * capacity);
+
+    switch (PyArray_TYPE(array)) { // type of passed in array
+        case NPY_INT64: {
+            npy_intp i = 0; // position within Boolean array
+            for (npy_intp i = 0; i < count_max; i++) {
+                if (*(npy_int64*)PyArray_GETPTR1(array, i) == 0) {
+                    if (AK_UNLIKELY(count == capacity)) {
+                        capacity <<= 1;
+                        indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);
+                        if (indices == NULL) {
+                            return NULL;
+                        }
+                    }
+                    indices[count++] = i;
+                }
+            }
+        }
+    }
+
+    npy_intp dims = {count};
+    final = PyArray_SimpleNewFromData(1, &dims, NPY_INT64, (void*)indices);
+    if (!final) {
+        free(indices);
+        return NULL;
+    }
+    // This ensures that the array frees the indices array; this has been tested by calling free(indices) and observing segfault
+    PyArray_ENABLEFLAGS((PyArrayObject*)final, NPY_ARRAY_OWNDATA);
+    PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
+    return final;
+
 }
 
 

From 652e314c32993215d55c0a3a14d23135345bc8f6 Mon Sep 17 00:00:00 2001
From: Christopher Ariza <ariza@flexatone.com>
Date: Thu, 6 Jun 2024 13:00:15 -0700
Subject: [PATCH 4/6] improvements to nonzero_1d, arg_equal_1d

---
 src/_arraykit.c           | 137 ++++++++++++++++++++++++++++++--------
 test/test_arg_equal_1d.py |  15 +++++
 2 files changed, 124 insertions(+), 28 deletions(-)

diff --git a/src/_arraykit.c b/src/_arraykit.c
index 1e98d867..77a3eddc 100644
--- a/src/_arraykit.c
+++ b/src/_arraykit.c
@@ -3562,13 +3562,11 @@ static inline PyObject*
 AK_nonzero_1d(PyArrayObject* array) {
     // the maxiumum number of indices we could return is the size of the array; if this is under a certain number, probably better to just allocate that rather than reallocate
     PyObject* final;
+    npy_intp dims[1] = {0}; // update later
     npy_intp count_max = PyArray_SIZE(array);
 
     if (count_max == 0) { // return empty array
-        npy_intp dims = {count_max};
-        final = PyArray_SimpleNew(1, &dims, NPY_INT64);
-        PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
-        return final;
+        goto empty;
     }
     lldiv_t size_div = lldiv((long long)count_max, 8); // quot, rem
 
@@ -3642,8 +3640,12 @@ AK_nonzero_1d(PyArrayObject* array) {
     }
     NPY_END_THREADS;
 
-    npy_intp dims = {count};
-    final = PyArray_SimpleNewFromData(1, &dims, NPY_INT64, (void*)indices);
+    if (count == 0) {
+        free(indices);
+        goto empty;
+    }
+    dims[0] = count;
+    final = PyArray_SimpleNewFromData(1, dims, NPY_INT64, (void*)indices);
     if (!final) {
         free(indices);
         return NULL;
@@ -3652,6 +3654,10 @@ AK_nonzero_1d(PyArrayObject* array) {
     PyArray_ENABLEFLAGS((PyArrayObject*)final, NPY_ARRAY_OWNDATA);
     PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
     return final;
+empty:
+    final = PyArray_SimpleNew(1, dims, NPY_INT64);
+    PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
+    return final;
 }
 #undef NONZERO_APPEND_INDEX_RELATIVE
 #undef NONZERO_APPEND_INDEX_ABSOLUTE
@@ -3674,28 +3680,93 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
 
 //------------------------------------------------------------------------------
 
-#define NONZERO_APPEND_INDEX_RELATIVE {                                      \
-    if (AK_UNLIKELY(count == capacity)) {                                    \
-        capacity <<= 1;                                                      \
-        indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\
-        if (indices == NULL) {                                               \
-            return NULL;                                                     \
-        }                                                                    \
-    }                                                                        \
-    indices[count++] = p - p_start;                                          \
-}                                                                            \
-
-
+static npy_int64
+AK_obj_to_int(PyObject* obj, bool* error) {
+    npy_int64 v = 0;
+    *error = false;
+    if (PyArray_IsScalar(obj, LongLong)) {
+        v = (npy_int64)PyArrayScalar_VAL(obj, LongLong);
+    }
+    else if (PyArray_IsScalar(obj, Long)) {
+        v = (npy_int64)PyArrayScalar_VAL(obj, Long);
+    }
+    else if (PyLong_Check(obj)) {
+        v = PyLong_AsLongLong(obj);
+        if (v == -1 && PyErr_Occurred()) {
+            PyErr_Clear();
+            *error = true;
+        }
+    }
+    else if (PyArray_IsScalar(obj, Double)) {
+        double dv = PyArrayScalar_VAL(obj, Double);
+        if (floor(dv) != dv) {
+            *error = true;
+        }
+        v = (npy_int64)dv;
+    }
+    else if (PyFloat_Check(obj)) {
+        double dv = PyFloat_AsDouble(obj);
+        if (dv == -1.0 && PyErr_Occurred()) {
+            PyErr_Clear();
+            *error = true;
+        }
+        v = (npy_int64)dv; // truncate to integer
+        if (v != dv) {
+            *error = true;
+        }
+    }
+    else if (PyArray_IsScalar(obj, ULongLong)) {
+        v = (npy_int64)PyArrayScalar_VAL(obj, ULongLong);
+    }
+    else if (PyArray_IsScalar(obj, ULong)) {
+        v = (npy_int64)PyArrayScalar_VAL(obj, ULong);
+    }
+    else if (PyArray_IsScalar(obj, Int)) {
+        v = (npy_int64)PyArrayScalar_VAL(obj, Int);
+    }
+    else if (PyArray_IsScalar(obj, UInt)) {
+        v = (npy_int64)PyArrayScalar_VAL(obj, UInt);
+    }
+    else if (PyArray_IsScalar(obj, Float)) {
+        double dv = (double)PyArrayScalar_VAL(obj, Float);
+        if (floor(dv) != dv) {
+            *error = true;
+        }
+        v = (npy_int64)dv;
+    }
+    else if (PyArray_IsScalar(obj, Half)) {
+        double dv = npy_half_to_double(PyArrayScalar_VAL(obj, Half));
+        if (floor(dv) != dv) {
+            *error = true;
+        }
+        v = (npy_int64)dv;
+    }
+    else if (PyBool_Check(obj)) {
+        v = PyObject_IsTrue(obj);
+    }
+    else if (PyNumber_Check(obj)) {
+        // NOTE: we handle PyArray Scalar Byte, Short, UByte, UShort with PyNumber_Check, below, saving four branches here
+        // NOTE: this returns a Py_ssize_t, which might be 32 bit. This can be used for PyArray_Scalars <= ssize_t.
+        v = (npy_int64)PyNumber_AsSsize_t(obj, PyExc_OverflowError);
+        if (v == -1 && PyErr_Occurred()) {
+            *error = true;
+        }
+    }
+    else {
+        *error = true;
+    }
+    return v;
+}
 
 static inline PyObject*
 AK_arg_equal_1d(PyArrayObject* array, PyObject* value) {
     PyObject* final;
+    npy_intp dims[1] = {0}; // update later
     npy_intp count_max = PyArray_SIZE(array);
+    bool error;
+
     if (count_max == 0) { // return empty array
-        npy_intp dims = {count_max};
-        final = PyArray_SimpleNew(1, &dims, NPY_INT64);
-        PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
-        return final;
+        goto empty;
     }
 
     // lldiv_t size_div = lldiv((long long)size, 8); // quot, rem
@@ -3706,9 +3777,13 @@ AK_arg_equal_1d(PyArrayObject* array, PyObject* value) {
 
     switch (PyArray_TYPE(array)) { // type of passed in array
         case NPY_INT64: {
-            npy_intp i = 0; // position within Boolean array
+            // try to convert the object to an int; if not possible, no matches
+            npy_int64 v = AK_obj_to_int(value, &error);
+            if (error) {
+                goto empty;
+            }
             for (npy_intp i = 0; i < count_max; i++) {
-                if (*(npy_int64*)PyArray_GETPTR1(array, i) == 0) {
+                if (*(npy_int64*)PyArray_GETPTR1(array, i) == v) {
                     if (AK_UNLIKELY(count == capacity)) {
                         capacity <<= 1;
                         indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);
@@ -3721,9 +3796,12 @@ AK_arg_equal_1d(PyArrayObject* array, PyObject* value) {
             }
         }
     }
-
-    npy_intp dims = {count};
-    final = PyArray_SimpleNewFromData(1, &dims, NPY_INT64, (void*)indices);
+    if (count == 0) {
+        free(indices);
+        goto empty;
+    }
+    dims[0] = count;
+    final = PyArray_SimpleNewFromData(1, dims, NPY_INT64, (void*)indices);
     if (!final) {
         free(indices);
         return NULL;
@@ -3732,7 +3810,10 @@ AK_arg_equal_1d(PyArrayObject* array, PyObject* value) {
     PyArray_ENABLEFLAGS((PyArrayObject*)final, NPY_ARRAY_OWNDATA);
     PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
     return final;
-
+empty:
+    final = PyArray_SimpleNew(1, dims, NPY_INT64); // dims set to 0
+    PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE);
+    return final;
 }
 
 
diff --git a/test/test_arg_equal_1d.py b/test/test_arg_equal_1d.py
index 322155a6..af4467ad 100644
--- a/test/test_arg_equal_1d.py
+++ b/test/test_arg_equal_1d.py
@@ -12,3 +12,18 @@ def test_arg_equal_1d_a1(self) -> None:
 
         with self.assertRaises(ValueError):
             arg_equal_1d(a, None)
+
+
+    def test_arg_equal_1d_int_a(self) -> None:
+        a = np.array([4, 0, 4, 0, 5, 8, 0])
+        self.assertEqual(arg_equal_1d(a, 0).tolist(), [1, 3, 6])
+        self.assertEqual(arg_equal_1d(a, 4).tolist(), [0, 2])
+
+    def test_arg_equal_1d_int_b(self) -> None:
+        a = np.arange(100_000)
+        self.assertEqual(arg_equal_1d(a, a[99_999]).tolist(), [99_999])
+        self.assertEqual(arg_equal_1d(a, a[99_999]).tolist(), [99_999])
+
+    def test_arg_equal_1d_int_c(self) -> None:
+        a = np.array([4, 0, 4, 0, 5, 8, 0])
+        self.assertEqual(arg_equal_1d(a, 20).tolist(), [])

From c9d3378139d43a473b5cffa6c413a8e03d051783 Mon Sep 17 00:00:00 2001
From: Christopher Ariza <ariza@flexatone.com>
Date: Thu, 6 Jun 2024 13:18:43 -0700
Subject: [PATCH 5/6] tests

---
 src/_arraykit.c           |  1 +
 test/test_arg_equal_1d.py | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/src/_arraykit.c b/src/_arraykit.c
index 77a3eddc..cbb036b4 100644
--- a/src/_arraykit.c
+++ b/src/_arraykit.c
@@ -3684,6 +3684,7 @@ static npy_int64
 AK_obj_to_int(PyObject* obj, bool* error) {
     npy_int64 v = 0;
     *error = false;
+
     if (PyArray_IsScalar(obj, LongLong)) {
         v = (npy_int64)PyArrayScalar_VAL(obj, LongLong);
     }
diff --git a/test/test_arg_equal_1d.py b/test/test_arg_equal_1d.py
index af4467ad..8838ac44 100644
--- a/test/test_arg_equal_1d.py
+++ b/test/test_arg_equal_1d.py
@@ -27,3 +27,21 @@ def test_arg_equal_1d_int_b(self) -> None:
     def test_arg_equal_1d_int_c(self) -> None:
         a = np.array([4, 0, 4, 0, 5, 8, 0])
         self.assertEqual(arg_equal_1d(a, 20).tolist(), [])
+
+    def test_arg_equal_1d_int_d(self) -> None:
+        a = np.array([4, 0, 4, 0, 5, 8, 0])
+        self.assertEqual(arg_equal_1d(a, "foo").tolist(), [])
+
+    def test_arg_equal_1d_int_d(self) -> None:
+        a = np.array([4, 0, 4, 0, 5, 8, 0])
+        self.assertEqual(arg_equal_1d(a, None).tolist(), [])
+
+    def test_arg_equal_1d_int_e(self) -> None:
+        # NOTE: this is consistent with numpy
+        a = np.array([4, 0, 4, 0, 5, 8, 0])
+        self.assertEqual(arg_equal_1d(a, False).tolist(), [1, 3, 6])
+
+    def test_arg_equal_1d_int_e(self) -> None:
+        # NOTE: this is consistent with numpy
+        a = np.array([4, 0, 4, 0, 5, 8, 1])
+        self.assertEqual(arg_equal_1d(a, True).tolist(), [6])

From 83a340cffc9b10826f59b4aed6f604e41ad16832 Mon Sep 17 00:00:00 2001
From: Christopher Ariza <ariza@flexatone.com>
Date: Thu, 6 Jun 2024 18:03:49 -0700
Subject: [PATCH 6/6] additional sketching

---
 doc/articles/arg_equal_1d.py | 257 +++++++++++++++++++++++++++++++++++
 src/_arraykit.c              |  75 ++++++++--
 test/test_arg_equal_1d.py    |   2 +-
 3 files changed, 320 insertions(+), 14 deletions(-)
 create mode 100644 doc/articles/arg_equal_1d.py

diff --git a/doc/articles/arg_equal_1d.py b/doc/articles/arg_equal_1d.py
new file mode 100644
index 00000000..123024a2
--- /dev/null
+++ b/doc/articles/arg_equal_1d.py
@@ -0,0 +1,257 @@
+
+
+
+import os
+import sys
+import timeit
+import typing as tp
+
+from arraykit import arg_equal_1d
+import arraykit as ak
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+sys.path.append(os.getcwd())
+
+
+
+class ArrayProcessor:
+    NAME = ''
+    SORT = -1
+
+    def __init__(self, array: np.ndarray):
+        self.array = array
+
+#-------------------------------------------------------------------------------
+class AKArgEqual(ArrayProcessor):
+    NAME = 'ak.arg_equal_1d()'
+    SORT = 0
+
+    def __call__(self):
+        _ = arg_equal_1d(self.array, -1)
+
+class NPEq(ArrayProcessor):
+    NAME = 'np.__eq__()'
+    SORT = 1
+
+    def __call__(self):
+        _ = self.array == -1
+
+class NPEqNonZero(ArrayProcessor):
+    NAME = 'np.nonzero(np.__eq__())[0]'
+    SORT = 3
+
+    def __call__(self):
+        e = self.array == -1
+        _ = np.nonzero(e)[0]
+
+
+#-------------------------------------------------------------------------------
+NUMBER = 200
+
+def seconds_to_display(seconds: float) -> str:
+    seconds /= NUMBER
+    if seconds < 1e-4:
+        return f'{seconds * 1e6: .1f} (µs)'
+    if seconds < 1e-1:
+        return f'{seconds * 1e3: .1f} (ms)'
+    return f'{seconds: .1f} (s)'
+
+
+def plot_performance(frame):
+    fixture_total = len(frame['fixture'].unique())
+    cat_total = len(frame['size'].unique())
+    processor_total = len(frame['cls_processor'].unique())
+    fig, axes = plt.subplots(cat_total, fixture_total)
+
+    # cmap = plt.get_cmap('terrain')
+    cmap = plt.get_cmap('plasma')
+
+    color = cmap(np.arange(processor_total) / max(processor_total, 3))
+
+    # category is the size of the array
+    for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')):
+        # each fixture is a collection of tests for one display
+        fixtures = {fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture')}
+        for fixture_count, (fixture_label, fixture) in enumerate(fixtures.items()):
+            ax = axes[cat_count][fixture_count]
+
+            # set order
+            fixture['sort'] = [f.SORT for f in fixture['cls_processor']]
+            fixture = fixture.sort_values('sort')
+
+            results = fixture['time'].values.tolist()
+            names = [cls.NAME for cls in fixture['cls_processor']]
+            # x = np.arange(len(results))
+            names_display = names
+            post = ax.bar(names_display, results, color=color)
+
+            # density, position = fixture_label.split('-')
+            # cat_label is the size of the array
+            title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[fixture_label]}'
+
+            ax.set_title(title, fontsize=6)
+            ax.set_box_aspect(0.75) # makes taller tan wide
+            time_max = fixture['time'].max()
+            ax.set_yticks([0, time_max * 0.5, time_max])
+            ax.set_yticklabels(['',
+                    seconds_to_display(time_max * .5),
+                    seconds_to_display(time_max),
+                    ], fontsize=6)
+            # ax.set_xticks(x, names_display, rotation='vertical')
+            ax.tick_params(
+                    axis='x',
+                    which='both',
+                    bottom=False,
+                    top=False,
+                    labelbottom=False,
+                    )
+
+    fig.set_size_inches(9, 4) # width, height
+    fig.legend(post, names_display, loc='center right', fontsize=6)
+    # horizontal, vertical
+    fig.text(.05, .96, f'arg_equal_1d() Performance: {NUMBER} Iterations', fontsize=10)
+    fig.text(.05, .90, get_versions(), fontsize=6)
+
+    fp = '/tmp/arg_equal_1d.png'
+    plt.subplots_adjust(
+            left=0.075,
+            bottom=0.05,
+            right=0.80,
+            top=0.85,
+            wspace=0.9, # width
+            hspace=0.2,
+            )
+    # plt.rcParams.update({'font.size': 22})
+    plt.savefig(fp, dpi=300)
+
+    if sys.platform.startswith('linux'):
+        os.system(f'eog {fp}&')
+    else:
+        os.system(f'open {fp}')
+
+
+#-------------------------------------------------------------------------------
+
+class FixtureFactory:
+    NAME = ''
+
+    @staticmethod
+    def get_array(size: int) -> np.ndarray:
+        return np.arange(size)
+
+    def _get_array_filled(
+            size: int,
+            start_third: int, #0, 1 or 2
+            density: float, # less than 1
+            ) -> np.ndarray:
+        a = FixtureFactory.get_array(size)
+        count = size * density
+        start = int(len(a) * (start_third/3))
+        length = len(a) - start
+        step = max(int(length / count), 1)
+        fill = np.arange(start, len(a), step)
+        a[fill] = -1
+        return a
+
+    @classmethod
+    def get_label_array(cls, size: int) -> tp.Tuple[str, np.ndarray]:
+        array = cls.get_array(size)
+        return cls.NAME, array
+
+    DENSITY_TO_DISPLAY = {
+        'single': '1 Match',
+        'quarter': '25% Match',
+        'half': '50% Match',
+        'full': '100% Match',
+    }
+
+
+
+class FFSingle(FixtureFactory):
+    NAME = 'single'
+
+    @staticmethod
+    def get_array(size: int) -> np.ndarray:
+        a = FixtureFactory.get_array(size)
+        a[len(a) // 2] = -1
+        return a
+
+class FFQuarter(FixtureFactory):
+    NAME = 'quarter'
+
+    @classmethod
+    def get_array(cls, size: int) -> np.ndarray:
+        return cls._get_array_filled(size, start_third=0, density=0.25)
+
+class FFHalf(FixtureFactory):
+    NAME = 'half'
+
+    @classmethod
+    def get_array(cls, size: int) -> np.ndarray:
+        return cls._get_array_filled(size, start_third=0, density=0.5)
+
+
+class FFFull(FixtureFactory):
+    NAME = 'full'
+
+    @classmethod
+    def get_array(cls, size: int) -> np.ndarray:
+        return cls._get_array_filled(size, start_third=0, density=1)
+
+
+def get_versions() -> str:
+    import platform
+    return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n'
+
+
+CLS_PROCESSOR = (
+    AKArgEqual,
+    NPEq,
+    NPEqNonZero,
+    )
+
+CLS_FF = (
+    FFSingle,
+    FFQuarter,
+    FFHalf,
+    FFFull,
+)
+
+
+def run_test():
+    records = []
+    for size in (10_000, 100_000, 1_000_000):
+        for ff in CLS_FF:
+            fixture_label, fixture = ff.get_label_array(size)
+            for cls in CLS_PROCESSOR:
+                runner = cls(fixture)
+
+                record = [cls, NUMBER, fixture_label, size]
+                print(record)
+                try:
+                    result = timeit.timeit(
+                            f'runner()',
+                            globals=locals(),
+                            number=NUMBER)
+                except OSError:
+                    result = np.nan
+                finally:
+                    pass
+                record.append(result)
+                records.append(record)
+
+    f = pd.DataFrame.from_records(records,
+            columns=('cls_processor', 'number', 'fixture', 'size', 'time')
+            )
+    print(f)
+    plot_performance(f)
+
+if __name__ == '__main__':
+
+    run_test()
+
+
+
diff --git a/src/_arraykit.c b/src/_arraykit.c
index cbb036b4..0498e2d2 100644
--- a/src/_arraykit.c
+++ b/src/_arraykit.c
@@ -3535,6 +3535,19 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) {
 //------------------------------------------------------------------------------
 // general utility
 
+static npy_uint32
+AK_next_power(npy_uint32 v) {
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v++;
+    return v;
+}
+
+
 #define NONZERO_APPEND_INDEX_RELATIVE {                                      \
     if (AK_UNLIKELY(count == capacity)) {                                    \
         capacity <<= 1;                                                      \
@@ -3572,7 +3585,7 @@ AK_nonzero_1d(PyArrayObject* array) {
 
     Py_ssize_t count = 0;
     // the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size
-    Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 8;
+    Py_ssize_t capacity = count_max < 512 ? 512 : AK_next_power((npy_uint32)(count_max / 8));
     npy_int64* indices = (npy_int64*)malloc(sizeof(npy_int64) * capacity);
 
     NPY_BEGIN_THREADS_DEF;
@@ -3680,7 +3693,7 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) {
 
 //------------------------------------------------------------------------------
 
-static npy_int64
+static inline npy_int64
 AK_obj_to_int(PyObject* obj, bool* error) {
     npy_int64 v = 0;
     *error = false;
@@ -3773,30 +3786,67 @@ AK_arg_equal_1d(PyArrayObject* array, PyObject* value) {
     // lldiv_t size_div = lldiv((long long)size, 8); // quot, rem
     Py_ssize_t count = 0;
     // the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size
-    Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 8;
+    Py_ssize_t capacity = count_max < 1024 ? 1024 : AK_next_power((npy_uint32)(count_max / 8));
     npy_int64* indices = (npy_int64*)malloc(sizeof(npy_int64) * capacity);
 
+    NpyIter *iter = NpyIter_New(
+            array,                                      // array
+            NPY_ITER_READONLY | NPY_ITER_EXTERNAL_LOOP, // iter flags
+            NPY_KEEPORDER,                              // order
+            NPY_NO_CASTING,                             // casting
+            NULL                                        // dtype
+            );
+    if (iter == NULL) {
+        free(indices);
+        return NULL;
+    }
+    NpyIter_IterNextFunc *iter_next = NpyIter_GetIterNext(iter, NULL);
+    if (iter_next == NULL) {
+        free(indices);
+        NpyIter_Deallocate(iter);
+        return NULL;
+    }
+    char **data_ptr = NpyIter_GetDataPtrArray(iter);
+    char* data;
+    npy_intp *stride_ptr = NpyIter_GetInnerStrideArray(iter);
+    npy_intp stride;
+    npy_intp *inner_size_ptr = NpyIter_GetInnerLoopSizePtr(iter);
+    npy_intp inner_size;
+    npy_int64 i = 0;
+
     switch (PyArray_TYPE(array)) { // type of passed in array
         case NPY_INT64: {
             // try to convert the object to an int; if not possible, no matches
             npy_int64 v = AK_obj_to_int(value, &error);
             if (error) {
+                free(indices);
+                NpyIter_Deallocate(iter);
                 goto empty;
             }
-            for (npy_intp i = 0; i < count_max; i++) {
-                if (*(npy_int64*)PyArray_GETPTR1(array, i) == v) {
-                    if (AK_UNLIKELY(count == capacity)) {
-                        capacity <<= 1;
-                        indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);
-                        if (indices == NULL) {
-                            return NULL;
+
+            do {
+                data = *data_ptr;
+                stride = *stride_ptr;
+                inner_size = *inner_size_ptr;
+                while (inner_size--) {
+                    if (*(npy_int64*)data == v) {
+                        if (AK_UNLIKELY(count == capacity)) {
+                            capacity <<= 1;
+                            indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);
+                            if (indices == NULL) {
+                                NpyIter_Deallocate(iter);
+                                return NULL;
+                            }
                         }
+                        indices[count++] = i;
                     }
-                    indices[count++] = i;
+                    i++;
+                    data += stride;
                 }
-            }
+            } while(iter_next(iter));
         }
     }
+    NpyIter_Deallocate(iter);
     if (count == 0) {
         free(indices);
         goto empty;
@@ -3837,7 +3887,6 @@ arg_equal_1d(PyObject *Py_UNUSED(m), PyObject *args) {
 }
 
 
-
 //------------------------------------------------------------------------------
 
 static char *first_true_1d_kwarg_names[] = {
diff --git a/test/test_arg_equal_1d.py b/test/test_arg_equal_1d.py
index 8838ac44..20b898a6 100644
--- a/test/test_arg_equal_1d.py
+++ b/test/test_arg_equal_1d.py
@@ -41,7 +41,7 @@ def test_arg_equal_1d_int_e(self) -> None:
         a = np.array([4, 0, 4, 0, 5, 8, 0])
         self.assertEqual(arg_equal_1d(a, False).tolist(), [1, 3, 6])
 
-    def test_arg_equal_1d_int_e(self) -> None:
+    def test_arg_equal_1d_int_f(self) -> None:
         # NOTE: this is consistent with numpy
         a = np.array([4, 0, 4, 0, 5, 8, 1])
         self.assertEqual(arg_equal_1d(a, True).tolist(), [6])