From 37112839de1647f2da9ad02e7175fdd45f182a87 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Thu, 6 Jun 2024 10:35:30 -0700 Subject: [PATCH 1/6] preliminary setup fpr arg_equal_1d --- src/__init__.py | 1 + src/__init__.pyi | 1 + src/_arraykit.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/__init__.py b/src/__init__.py index b88ea42c..62d2af5a 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -29,3 +29,4 @@ from ._arraykit import first_true_2d as first_true_2d from ._arraykit import slice_to_ascending_slice as slice_to_ascending_slice from ._arraykit import nonzero_1d as nonzero_1d +from ._arraykit import arg_equal_1d as arg_equal_1d diff --git a/src/__init__.pyi b/src/__init__.pyi index 16805f91..34088ae2 100644 --- a/src/__init__.pyi +++ b/src/__init__.pyi @@ -160,4 +160,5 @@ def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) -> def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ... def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ... def nonzero_1d(__array: np.ndarray, /) -> np.ndarray: ... +def arg_equal_1d(__array: np.ndarray, __value: tp.Any, /) -> np.ndarray: ... def slice_to_ascending_slice(__slice: slice, __size: int) -> slice: ... diff --git a/src/_arraykit.c b/src/_arraykit.c index e5947966..c6f4c77f 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3672,6 +3672,36 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) { } +//------------------------------------------------------------------------------ + +static inline PyObject* +AK_arg_equal_1d(PyArrayObject* array, PyObject* value) { + Py_RETURN_NONE; +} + + +static PyObject* +arg_equal_1d(PyObject *Py_UNUSED(m), PyObject *args) { + PyArrayObject* array; + PyObject* value; + if (!PyArg_ParseTuple(args, + "O!O:arg_equal_1d", + &PyArray_Type, &array, + &value)) { + return NULL; + } + + if (PyArray_NDIM(array) != 1) { + PyErr_SetString(PyExc_ValueError, "Array must be 1-dimensional"); + return NULL; + } + return AK_arg_equal_1d(array, value); +} + + + +//------------------------------------------------------------------------------ + static char *first_true_1d_kwarg_names[] = { "array", "forward", @@ -7310,6 +7340,7 @@ static PyMethodDef arraykit_methods[] = { NULL}, {"count_iteration", count_iteration, METH_O, NULL}, {"nonzero_1d", nonzero_1d, METH_O, NULL}, + {"arg_equal_1d", arg_equal_1d, METH_VARARGS, NULL}, {"isna_element", (PyCFunction)isna_element, METH_VARARGS | METH_KEYWORDS, From 34a2153759babc9077298e20b8bc1dc9957f9f43 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Thu, 6 Jun 2024 10:44:04 -0700 Subject: [PATCH 2/6] preliminary interface tests --- test/test_arg_equal_1d.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 test/test_arg_equal_1d.py diff --git a/test/test_arg_equal_1d.py b/test/test_arg_equal_1d.py new file mode 100644 index 00000000..322155a6 --- /dev/null +++ b/test/test_arg_equal_1d.py @@ -0,0 +1,14 @@ +import unittest +import numpy as np + +from arraykit import arg_equal_1d + +class TestUnit(unittest.TestCase): + + def test_arg_equal_1d_a1(self) -> None: + a = np.arange(6).reshape(2, 3) + with self.assertRaises(TypeError): + arg_equal_1d(a) + + with self.assertRaises(ValueError): + arg_equal_1d(a, None) From 48d257d9ddf7a7a377e8df9508b835177d03c7cd Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Thu, 6 Jun 2024 11:52:25 -0700 Subject: [PATCH 3/6] preliminary sketch --- src/_arraykit.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index c6f4c77f..1e98d867 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3674,9 +3674,65 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) { //------------------------------------------------------------------------------ +#define NONZERO_APPEND_INDEX_RELATIVE { \ + if (AK_UNLIKELY(count == capacity)) { \ + capacity <<= 1; \ + indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\ + if (indices == NULL) { \ + return NULL; \ + } \ + } \ + indices[count++] = p - p_start; \ +} \ + + + static inline PyObject* AK_arg_equal_1d(PyArrayObject* array, PyObject* value) { - Py_RETURN_NONE; + PyObject* final; + npy_intp count_max = PyArray_SIZE(array); + if (count_max == 0) { // return empty array + npy_intp dims = {count_max}; + final = PyArray_SimpleNew(1, &dims, NPY_INT64); + PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE); + return final; + } + + // lldiv_t size_div = lldiv((long long)size, 8); // quot, rem + Py_ssize_t count = 0; + // the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size + Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 8; + npy_int64* indices = (npy_int64*)malloc(sizeof(npy_int64) * capacity); + + switch (PyArray_TYPE(array)) { // type of passed in array + case NPY_INT64: { + npy_intp i = 0; // position within Boolean array + for (npy_intp i = 0; i < count_max; i++) { + if (*(npy_int64*)PyArray_GETPTR1(array, i) == 0) { + if (AK_UNLIKELY(count == capacity)) { + capacity <<= 1; + indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity); + if (indices == NULL) { + return NULL; + } + } + indices[count++] = i; + } + } + } + } + + npy_intp dims = {count}; + final = PyArray_SimpleNewFromData(1, &dims, NPY_INT64, (void*)indices); + if (!final) { + free(indices); + return NULL; + } + // This ensures that the array frees the indices array; this has been tested by calling free(indices) and observing segfault + PyArray_ENABLEFLAGS((PyArrayObject*)final, NPY_ARRAY_OWNDATA); + PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE); + return final; + } From 652e314c32993215d55c0a3a14d23135345bc8f6 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Thu, 6 Jun 2024 13:00:15 -0700 Subject: [PATCH 4/6] improvements to nonzero_1d, arg_equal_1d --- src/_arraykit.c | 137 ++++++++++++++++++++++++++++++-------- test/test_arg_equal_1d.py | 15 +++++ 2 files changed, 124 insertions(+), 28 deletions(-) diff --git a/src/_arraykit.c b/src/_arraykit.c index 1e98d867..77a3eddc 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3562,13 +3562,11 @@ static inline PyObject* AK_nonzero_1d(PyArrayObject* array) { // the maxiumum number of indices we could return is the size of the array; if this is under a certain number, probably better to just allocate that rather than reallocate PyObject* final; + npy_intp dims[1] = {0}; // update later npy_intp count_max = PyArray_SIZE(array); if (count_max == 0) { // return empty array - npy_intp dims = {count_max}; - final = PyArray_SimpleNew(1, &dims, NPY_INT64); - PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE); - return final; + goto empty; } lldiv_t size_div = lldiv((long long)count_max, 8); // quot, rem @@ -3642,8 +3640,12 @@ AK_nonzero_1d(PyArrayObject* array) { } NPY_END_THREADS; - npy_intp dims = {count}; - final = PyArray_SimpleNewFromData(1, &dims, NPY_INT64, (void*)indices); + if (count == 0) { + free(indices); + goto empty; + } + dims[0] = count; + final = PyArray_SimpleNewFromData(1, dims, NPY_INT64, (void*)indices); if (!final) { free(indices); return NULL; @@ -3652,6 +3654,10 @@ AK_nonzero_1d(PyArrayObject* array) { PyArray_ENABLEFLAGS((PyArrayObject*)final, NPY_ARRAY_OWNDATA); PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE); return final; +empty: + final = PyArray_SimpleNew(1, dims, NPY_INT64); + PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE); + return final; } #undef NONZERO_APPEND_INDEX_RELATIVE #undef NONZERO_APPEND_INDEX_ABSOLUTE @@ -3674,28 +3680,93 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) { //------------------------------------------------------------------------------ -#define NONZERO_APPEND_INDEX_RELATIVE { \ - if (AK_UNLIKELY(count == capacity)) { \ - capacity <<= 1; \ - indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity);\ - if (indices == NULL) { \ - return NULL; \ - } \ - } \ - indices[count++] = p - p_start; \ -} \ - - +static npy_int64 +AK_obj_to_int(PyObject* obj, bool* error) { + npy_int64 v = 0; + *error = false; + if (PyArray_IsScalar(obj, LongLong)) { + v = (npy_int64)PyArrayScalar_VAL(obj, LongLong); + } + else if (PyArray_IsScalar(obj, Long)) { + v = (npy_int64)PyArrayScalar_VAL(obj, Long); + } + else if (PyLong_Check(obj)) { + v = PyLong_AsLongLong(obj); + if (v == -1 && PyErr_Occurred()) { + PyErr_Clear(); + *error = true; + } + } + else if (PyArray_IsScalar(obj, Double)) { + double dv = PyArrayScalar_VAL(obj, Double); + if (floor(dv) != dv) { + *error = true; + } + v = (npy_int64)dv; + } + else if (PyFloat_Check(obj)) { + double dv = PyFloat_AsDouble(obj); + if (dv == -1.0 && PyErr_Occurred()) { + PyErr_Clear(); + *error = true; + } + v = (npy_int64)dv; // truncate to integer + if (v != dv) { + *error = true; + } + } + else if (PyArray_IsScalar(obj, ULongLong)) { + v = (npy_int64)PyArrayScalar_VAL(obj, ULongLong); + } + else if (PyArray_IsScalar(obj, ULong)) { + v = (npy_int64)PyArrayScalar_VAL(obj, ULong); + } + else if (PyArray_IsScalar(obj, Int)) { + v = (npy_int64)PyArrayScalar_VAL(obj, Int); + } + else if (PyArray_IsScalar(obj, UInt)) { + v = (npy_int64)PyArrayScalar_VAL(obj, UInt); + } + else if (PyArray_IsScalar(obj, Float)) { + double dv = (double)PyArrayScalar_VAL(obj, Float); + if (floor(dv) != dv) { + *error = true; + } + v = (npy_int64)dv; + } + else if (PyArray_IsScalar(obj, Half)) { + double dv = npy_half_to_double(PyArrayScalar_VAL(obj, Half)); + if (floor(dv) != dv) { + *error = true; + } + v = (npy_int64)dv; + } + else if (PyBool_Check(obj)) { + v = PyObject_IsTrue(obj); + } + else if (PyNumber_Check(obj)) { + // NOTE: we handle PyArray Scalar Byte, Short, UByte, UShort with PyNumber_Check, below, saving four branches here + // NOTE: this returns a Py_ssize_t, which might be 32 bit. This can be used for PyArray_Scalars <= ssize_t. + v = (npy_int64)PyNumber_AsSsize_t(obj, PyExc_OverflowError); + if (v == -1 && PyErr_Occurred()) { + *error = true; + } + } + else { + *error = true; + } + return v; +} static inline PyObject* AK_arg_equal_1d(PyArrayObject* array, PyObject* value) { PyObject* final; + npy_intp dims[1] = {0}; // update later npy_intp count_max = PyArray_SIZE(array); + bool error; + if (count_max == 0) { // return empty array - npy_intp dims = {count_max}; - final = PyArray_SimpleNew(1, &dims, NPY_INT64); - PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE); - return final; + goto empty; } // lldiv_t size_div = lldiv((long long)size, 8); // quot, rem @@ -3706,9 +3777,13 @@ AK_arg_equal_1d(PyArrayObject* array, PyObject* value) { switch (PyArray_TYPE(array)) { // type of passed in array case NPY_INT64: { - npy_intp i = 0; // position within Boolean array + // try to convert the object to an int; if not possible, no matches + npy_int64 v = AK_obj_to_int(value, &error); + if (error) { + goto empty; + } for (npy_intp i = 0; i < count_max; i++) { - if (*(npy_int64*)PyArray_GETPTR1(array, i) == 0) { + if (*(npy_int64*)PyArray_GETPTR1(array, i) == v) { if (AK_UNLIKELY(count == capacity)) { capacity <<= 1; indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity); @@ -3721,9 +3796,12 @@ AK_arg_equal_1d(PyArrayObject* array, PyObject* value) { } } } - - npy_intp dims = {count}; - final = PyArray_SimpleNewFromData(1, &dims, NPY_INT64, (void*)indices); + if (count == 0) { + free(indices); + goto empty; + } + dims[0] = count; + final = PyArray_SimpleNewFromData(1, dims, NPY_INT64, (void*)indices); if (!final) { free(indices); return NULL; @@ -3732,7 +3810,10 @@ AK_arg_equal_1d(PyArrayObject* array, PyObject* value) { PyArray_ENABLEFLAGS((PyArrayObject*)final, NPY_ARRAY_OWNDATA); PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE); return final; - +empty: + final = PyArray_SimpleNew(1, dims, NPY_INT64); // dims set to 0 + PyArray_CLEARFLAGS((PyArrayObject*)final, NPY_ARRAY_WRITEABLE); + return final; } diff --git a/test/test_arg_equal_1d.py b/test/test_arg_equal_1d.py index 322155a6..af4467ad 100644 --- a/test/test_arg_equal_1d.py +++ b/test/test_arg_equal_1d.py @@ -12,3 +12,18 @@ def test_arg_equal_1d_a1(self) -> None: with self.assertRaises(ValueError): arg_equal_1d(a, None) + + + def test_arg_equal_1d_int_a(self) -> None: + a = np.array([4, 0, 4, 0, 5, 8, 0]) + self.assertEqual(arg_equal_1d(a, 0).tolist(), [1, 3, 6]) + self.assertEqual(arg_equal_1d(a, 4).tolist(), [0, 2]) + + def test_arg_equal_1d_int_b(self) -> None: + a = np.arange(100_000) + self.assertEqual(arg_equal_1d(a, a[99_999]).tolist(), [99_999]) + self.assertEqual(arg_equal_1d(a, a[99_999]).tolist(), [99_999]) + + def test_arg_equal_1d_int_c(self) -> None: + a = np.array([4, 0, 4, 0, 5, 8, 0]) + self.assertEqual(arg_equal_1d(a, 20).tolist(), []) From c9d3378139d43a473b5cffa6c413a8e03d051783 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Thu, 6 Jun 2024 13:18:43 -0700 Subject: [PATCH 5/6] tests --- src/_arraykit.c | 1 + test/test_arg_equal_1d.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/_arraykit.c b/src/_arraykit.c index 77a3eddc..cbb036b4 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3684,6 +3684,7 @@ static npy_int64 AK_obj_to_int(PyObject* obj, bool* error) { npy_int64 v = 0; *error = false; + if (PyArray_IsScalar(obj, LongLong)) { v = (npy_int64)PyArrayScalar_VAL(obj, LongLong); } diff --git a/test/test_arg_equal_1d.py b/test/test_arg_equal_1d.py index af4467ad..8838ac44 100644 --- a/test/test_arg_equal_1d.py +++ b/test/test_arg_equal_1d.py @@ -27,3 +27,21 @@ def test_arg_equal_1d_int_b(self) -> None: def test_arg_equal_1d_int_c(self) -> None: a = np.array([4, 0, 4, 0, 5, 8, 0]) self.assertEqual(arg_equal_1d(a, 20).tolist(), []) + + def test_arg_equal_1d_int_d(self) -> None: + a = np.array([4, 0, 4, 0, 5, 8, 0]) + self.assertEqual(arg_equal_1d(a, "foo").tolist(), []) + + def test_arg_equal_1d_int_d(self) -> None: + a = np.array([4, 0, 4, 0, 5, 8, 0]) + self.assertEqual(arg_equal_1d(a, None).tolist(), []) + + def test_arg_equal_1d_int_e(self) -> None: + # NOTE: this is consistent with numpy + a = np.array([4, 0, 4, 0, 5, 8, 0]) + self.assertEqual(arg_equal_1d(a, False).tolist(), [1, 3, 6]) + + def test_arg_equal_1d_int_e(self) -> None: + # NOTE: this is consistent with numpy + a = np.array([4, 0, 4, 0, 5, 8, 1]) + self.assertEqual(arg_equal_1d(a, True).tolist(), [6]) From 83a340cffc9b10826f59b4aed6f604e41ad16832 Mon Sep 17 00:00:00 2001 From: Christopher Ariza Date: Thu, 6 Jun 2024 18:03:49 -0700 Subject: [PATCH 6/6] additional sketching --- doc/articles/arg_equal_1d.py | 257 +++++++++++++++++++++++++++++++++++ src/_arraykit.c | 75 ++++++++-- test/test_arg_equal_1d.py | 2 +- 3 files changed, 320 insertions(+), 14 deletions(-) create mode 100644 doc/articles/arg_equal_1d.py diff --git a/doc/articles/arg_equal_1d.py b/doc/articles/arg_equal_1d.py new file mode 100644 index 00000000..123024a2 --- /dev/null +++ b/doc/articles/arg_equal_1d.py @@ -0,0 +1,257 @@ + + + +import os +import sys +import timeit +import typing as tp + +from arraykit import arg_equal_1d +import arraykit as ak + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +sys.path.append(os.getcwd()) + + + +class ArrayProcessor: + NAME = '' + SORT = -1 + + def __init__(self, array: np.ndarray): + self.array = array + +#------------------------------------------------------------------------------- +class AKArgEqual(ArrayProcessor): + NAME = 'ak.arg_equal_1d()' + SORT = 0 + + def __call__(self): + _ = arg_equal_1d(self.array, -1) + +class NPEq(ArrayProcessor): + NAME = 'np.__eq__()' + SORT = 1 + + def __call__(self): + _ = self.array == -1 + +class NPEqNonZero(ArrayProcessor): + NAME = 'np.nonzero(np.__eq__())[0]' + SORT = 3 + + def __call__(self): + e = self.array == -1 + _ = np.nonzero(e)[0] + + +#------------------------------------------------------------------------------- +NUMBER = 200 + +def seconds_to_display(seconds: float) -> str: + seconds /= NUMBER + if seconds < 1e-4: + return f'{seconds * 1e6: .1f} (µs)' + if seconds < 1e-1: + return f'{seconds * 1e3: .1f} (ms)' + return f'{seconds: .1f} (s)' + + +def plot_performance(frame): + fixture_total = len(frame['fixture'].unique()) + cat_total = len(frame['size'].unique()) + processor_total = len(frame['cls_processor'].unique()) + fig, axes = plt.subplots(cat_total, fixture_total) + + # cmap = plt.get_cmap('terrain') + cmap = plt.get_cmap('plasma') + + color = cmap(np.arange(processor_total) / max(processor_total, 3)) + + # category is the size of the array + for cat_count, (cat_label, cat) in enumerate(frame.groupby('size')): + # each fixture is a collection of tests for one display + fixtures = {fixture_label: fixture for fixture_label, fixture in cat.groupby('fixture')} + for fixture_count, (fixture_label, fixture) in enumerate(fixtures.items()): + ax = axes[cat_count][fixture_count] + + # set order + fixture['sort'] = [f.SORT for f in fixture['cls_processor']] + fixture = fixture.sort_values('sort') + + results = fixture['time'].values.tolist() + names = [cls.NAME for cls in fixture['cls_processor']] + # x = np.arange(len(results)) + names_display = names + post = ax.bar(names_display, results, color=color) + + # density, position = fixture_label.split('-') + # cat_label is the size of the array + title = f'{cat_label:.0e}\n{FixtureFactory.DENSITY_TO_DISPLAY[fixture_label]}' + + ax.set_title(title, fontsize=6) + ax.set_box_aspect(0.75) # makes taller tan wide + time_max = fixture['time'].max() + ax.set_yticks([0, time_max * 0.5, time_max]) + ax.set_yticklabels(['', + seconds_to_display(time_max * .5), + seconds_to_display(time_max), + ], fontsize=6) + # ax.set_xticks(x, names_display, rotation='vertical') + ax.tick_params( + axis='x', + which='both', + bottom=False, + top=False, + labelbottom=False, + ) + + fig.set_size_inches(9, 4) # width, height + fig.legend(post, names_display, loc='center right', fontsize=6) + # horizontal, vertical + fig.text(.05, .96, f'arg_equal_1d() Performance: {NUMBER} Iterations', fontsize=10) + fig.text(.05, .90, get_versions(), fontsize=6) + + fp = '/tmp/arg_equal_1d.png' + plt.subplots_adjust( + left=0.075, + bottom=0.05, + right=0.80, + top=0.85, + wspace=0.9, # width + hspace=0.2, + ) + # plt.rcParams.update({'font.size': 22}) + plt.savefig(fp, dpi=300) + + if sys.platform.startswith('linux'): + os.system(f'eog {fp}&') + else: + os.system(f'open {fp}') + + +#------------------------------------------------------------------------------- + +class FixtureFactory: + NAME = '' + + @staticmethod + def get_array(size: int) -> np.ndarray: + return np.arange(size) + + def _get_array_filled( + size: int, + start_third: int, #0, 1 or 2 + density: float, # less than 1 + ) -> np.ndarray: + a = FixtureFactory.get_array(size) + count = size * density + start = int(len(a) * (start_third/3)) + length = len(a) - start + step = max(int(length / count), 1) + fill = np.arange(start, len(a), step) + a[fill] = -1 + return a + + @classmethod + def get_label_array(cls, size: int) -> tp.Tuple[str, np.ndarray]: + array = cls.get_array(size) + return cls.NAME, array + + DENSITY_TO_DISPLAY = { + 'single': '1 Match', + 'quarter': '25% Match', + 'half': '50% Match', + 'full': '100% Match', + } + + + +class FFSingle(FixtureFactory): + NAME = 'single' + + @staticmethod + def get_array(size: int) -> np.ndarray: + a = FixtureFactory.get_array(size) + a[len(a) // 2] = -1 + return a + +class FFQuarter(FixtureFactory): + NAME = 'quarter' + + @classmethod + def get_array(cls, size: int) -> np.ndarray: + return cls._get_array_filled(size, start_third=0, density=0.25) + +class FFHalf(FixtureFactory): + NAME = 'half' + + @classmethod + def get_array(cls, size: int) -> np.ndarray: + return cls._get_array_filled(size, start_third=0, density=0.5) + + +class FFFull(FixtureFactory): + NAME = 'full' + + @classmethod + def get_array(cls, size: int) -> np.ndarray: + return cls._get_array_filled(size, start_third=0, density=1) + + +def get_versions() -> str: + import platform + return f'OS: {platform.system()} / ArrayKit: {ak.__version__} / NumPy: {np.__version__}\n' + + +CLS_PROCESSOR = ( + AKArgEqual, + NPEq, + NPEqNonZero, + ) + +CLS_FF = ( + FFSingle, + FFQuarter, + FFHalf, + FFFull, +) + + +def run_test(): + records = [] + for size in (10_000, 100_000, 1_000_000): + for ff in CLS_FF: + fixture_label, fixture = ff.get_label_array(size) + for cls in CLS_PROCESSOR: + runner = cls(fixture) + + record = [cls, NUMBER, fixture_label, size] + print(record) + try: + result = timeit.timeit( + f'runner()', + globals=locals(), + number=NUMBER) + except OSError: + result = np.nan + finally: + pass + record.append(result) + records.append(record) + + f = pd.DataFrame.from_records(records, + columns=('cls_processor', 'number', 'fixture', 'size', 'time') + ) + print(f) + plot_performance(f) + +if __name__ == '__main__': + + run_test() + + + diff --git a/src/_arraykit.c b/src/_arraykit.c index cbb036b4..0498e2d2 100644 --- a/src/_arraykit.c +++ b/src/_arraykit.c @@ -3535,6 +3535,19 @@ resolve_dtype_iter(PyObject *Py_UNUSED(m), PyObject *arg) { //------------------------------------------------------------------------------ // general utility +static npy_uint32 +AK_next_power(npy_uint32 v) { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + + #define NONZERO_APPEND_INDEX_RELATIVE { \ if (AK_UNLIKELY(count == capacity)) { \ capacity <<= 1; \ @@ -3572,7 +3585,7 @@ AK_nonzero_1d(PyArrayObject* array) { Py_ssize_t count = 0; // the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size - Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 8; + Py_ssize_t capacity = count_max < 512 ? 512 : AK_next_power((npy_uint32)(count_max / 8)); npy_int64* indices = (npy_int64*)malloc(sizeof(npy_int64) * capacity); NPY_BEGIN_THREADS_DEF; @@ -3680,7 +3693,7 @@ nonzero_1d(PyObject *Py_UNUSED(m), PyObject *a) { //------------------------------------------------------------------------------ -static npy_int64 +static inline npy_int64 AK_obj_to_int(PyObject* obj, bool* error) { npy_int64 v = 0; *error = false; @@ -3773,30 +3786,67 @@ AK_arg_equal_1d(PyArrayObject* array, PyObject* value) { // lldiv_t size_div = lldiv((long long)size, 8); // quot, rem Py_ssize_t count = 0; // the maximum number of collected integers is equal to or less than count_max; for small count_max, we can just set that value; for large size, we set it to half the size - Py_ssize_t capacity = count_max < 1024 ? count_max : count_max / 8; + Py_ssize_t capacity = count_max < 1024 ? 1024 : AK_next_power((npy_uint32)(count_max / 8)); npy_int64* indices = (npy_int64*)malloc(sizeof(npy_int64) * capacity); + NpyIter *iter = NpyIter_New( + array, // array + NPY_ITER_READONLY | NPY_ITER_EXTERNAL_LOOP, // iter flags + NPY_KEEPORDER, // order + NPY_NO_CASTING, // casting + NULL // dtype + ); + if (iter == NULL) { + free(indices); + return NULL; + } + NpyIter_IterNextFunc *iter_next = NpyIter_GetIterNext(iter, NULL); + if (iter_next == NULL) { + free(indices); + NpyIter_Deallocate(iter); + return NULL; + } + char **data_ptr = NpyIter_GetDataPtrArray(iter); + char* data; + npy_intp *stride_ptr = NpyIter_GetInnerStrideArray(iter); + npy_intp stride; + npy_intp *inner_size_ptr = NpyIter_GetInnerLoopSizePtr(iter); + npy_intp inner_size; + npy_int64 i = 0; + switch (PyArray_TYPE(array)) { // type of passed in array case NPY_INT64: { // try to convert the object to an int; if not possible, no matches npy_int64 v = AK_obj_to_int(value, &error); if (error) { + free(indices); + NpyIter_Deallocate(iter); goto empty; } - for (npy_intp i = 0; i < count_max; i++) { - if (*(npy_int64*)PyArray_GETPTR1(array, i) == v) { - if (AK_UNLIKELY(count == capacity)) { - capacity <<= 1; - indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity); - if (indices == NULL) { - return NULL; + + do { + data = *data_ptr; + stride = *stride_ptr; + inner_size = *inner_size_ptr; + while (inner_size--) { + if (*(npy_int64*)data == v) { + if (AK_UNLIKELY(count == capacity)) { + capacity <<= 1; + indices = (npy_int64*)realloc(indices, sizeof(npy_int64) * capacity); + if (indices == NULL) { + NpyIter_Deallocate(iter); + return NULL; + } } + indices[count++] = i; } - indices[count++] = i; + i++; + data += stride; } - } + } while(iter_next(iter)); } } + NpyIter_Deallocate(iter); if (count == 0) { free(indices); goto empty; @@ -3837,7 +3887,6 @@ arg_equal_1d(PyObject *Py_UNUSED(m), PyObject *args) { } - //------------------------------------------------------------------------------ static char *first_true_1d_kwarg_names[] = { diff --git a/test/test_arg_equal_1d.py b/test/test_arg_equal_1d.py index 8838ac44..20b898a6 100644 --- a/test/test_arg_equal_1d.py +++ b/test/test_arg_equal_1d.py @@ -41,7 +41,7 @@ def test_arg_equal_1d_int_e(self) -> None: a = np.array([4, 0, 4, 0, 5, 8, 0]) self.assertEqual(arg_equal_1d(a, False).tolist(), [1, 3, 6]) - def test_arg_equal_1d_int_e(self) -> None: + def test_arg_equal_1d_int_f(self) -> None: # NOTE: this is consistent with numpy a = np.array([4, 0, 4, 0, 5, 8, 1]) self.assertEqual(arg_equal_1d(a, True).tolist(), [6])