From bfd47bab83c9045a61a79b8b4b3641c469569c5e Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Tue, 11 Jun 2024 15:19:41 +0200 Subject: [PATCH] Reduce memory consumption --- lap.h | 14 +++++++------- python.cc | 28 +++++++++++++++++++--------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/lap.h b/lap.h index d32523f..e4ea0a6 100644 --- a/lap.h +++ b/lap.h @@ -228,15 +228,14 @@ find_umins( /// @param u out dual variables, row reduction numbers / size dim /// @param v out dual variables, column reduction numbers / size dim /// @return achieved minimum assignment cost -template -cost lap(int dim, const cost *restrict assign_cost, bool verbose, +template +cost lap(int dim, const cost *restrict assign_cost, idx *restrict rowsol, idx *restrict colsol, cost *restrict u, cost *restrict v) { - auto free = std::unique_ptr(new idx[dim]); // list of unassigned rows. - auto collist = std::unique_ptr(new idx[dim]); // list of columns to be scanned in various ways. - auto matches = std::unique_ptr(new idx[dim]); // counts how many times a row could be assigned. - auto d = std::unique_ptr(new cost[dim]); // 'cost-distance' in augmenting path calculation. - auto pred = std::unique_ptr(new idx[dim]); // row-predecessor of column in augmenting/alternating path. + auto collist = std::make_unique(dim); // list of columns to be scanned in various ways. + auto matches = std::make_unique(dim); // counts how many times a row could be assigned. + auto d = std::make_unique(dim); // 'cost-distance' in augmenting path calculation. + auto pred = std::make_unique(dim); // row-predecessor of column in augmenting/alternating path. // init how many times a row will be assigned in the column reduction. #if _OPENMP >= 201307 @@ -273,6 +272,7 @@ cost lap(int dim, const cost *restrict assign_cost, bool verbose, } // REDUCTION TRANSFER + auto free = matches.get(); // list of unassigned rows. idx numfree = 0; for (idx i = 0; i < dim; i++) { const cost *local_cost = &assign_cost[i * dim]; diff --git a/python.cc b/python.cc index 96b780b..1702a6d 100644 --- a/python.cc +++ b/python.cc @@ -64,7 +64,8 @@ using pyobj = _pyobj; using pyarray = _pyobj; template -static always_inline double call_lap(int dim, const void *restrict cost_matrix, bool verbose, +static always_inline double call_lap(int dim, const void *restrict cost_matrix, + bool verbose, bool disable_avx, int *restrict row_ind, int *restrict col_ind, void *restrict u, void *restrict v) { double lapcost; @@ -76,10 +77,18 @@ static always_inline double call_lap(int dim, const void *restrict cost_matrix, auto cost_matrix_typed = reinterpret_cast(cost_matrix); auto u_typed = reinterpret_cast(u); auto v_typed = reinterpret_cast(v); - if (hasAVX2) { - lapcost = lap(dim, cost_matrix_typed, verbose, row_ind, col_ind, u_typed, v_typed); + if (hasAVX2 && !disable_avx) { + if (verbose) { + lapcost = lap(dim, cost_matrix_typed, row_ind, col_ind, u_typed, v_typed); + } else { + lapcost = lap(dim, cost_matrix_typed, row_ind, col_ind, u_typed, v_typed); + } } else { - lapcost = lap(dim, cost_matrix_typed, verbose, row_ind, col_ind, u_typed, v_typed); + if (verbose) { + lapcost = lap(dim, cost_matrix_typed, row_ind, col_ind, u_typed, v_typed); + } else { + lapcost = lap(dim, cost_matrix_typed, row_ind, col_ind, u_typed, v_typed); + } } Py_END_ALLOW_THREADS return lapcost; @@ -88,12 +97,13 @@ static always_inline double call_lap(int dim, const void *restrict cost_matrix, static PyObject *py_lapjv(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *cost_matrix_obj; int verbose = 0; + int disable_avx = 0; int force_doubles = 0; static const char *kwlist[] = { - "cost_matrix", "verbose", "force_doubles", NULL}; + "cost_matrix", "verbose", "disable_avx", "force_doubles", NULL}; if (!PyArg_ParseTupleAndKeywords( - args, kwargs, "O|pb", const_cast(kwlist), - &cost_matrix_obj, &verbose, &force_doubles)) { + args, kwargs, "O|pbb", const_cast(kwlist), + &cost_matrix_obj, &verbose, &disable_avx, &force_doubles)) { return NULL; } pyarray cost_matrix_array; @@ -144,9 +154,9 @@ static PyObject *py_lapjv(PyObject *self, PyObject *args, PyObject *kwargs) { auto u = PyArray_DATA(u_array.get()); auto v = PyArray_DATA(v_array.get()); if (float32) { - lapcost = call_lap(dim, cost_matrix, verbose, row_ind, col_ind, u, v); + lapcost = call_lap(dim, cost_matrix, verbose, disable_avx, row_ind, col_ind, u, v); } else { - lapcost = call_lap(dim, cost_matrix, verbose, row_ind, col_ind, u, v); + lapcost = call_lap(dim, cost_matrix, verbose, disable_avx, row_ind, col_ind, u, v); } return Py_BuildValue("(OO(dOO))", row_ind_array.get(), col_ind_array.get(), lapcost,