diff --git a/implicit/cpu/_als.pyx b/implicit/cpu/_als.pyx index 03ea1eb..439fe8c 100644 --- a/implicit/cpu/_als.pyx +++ b/implicit/cpu/_als.pyx @@ -247,6 +247,7 @@ def _least_squares_cg(integral[:] indptr, integral[:] indices, float[:] data, def calculate_loss(Cui, X, Y, regularization, num_threads=0): + """ Calculates the loss for an ALS model """ return _calculate_loss(Cui, Cui.indptr, Cui.indices, Cui.data.astype('float32'), X, Y, regularization, num_threads) diff --git a/implicit/cpu/als.py b/implicit/cpu/als.py index 6f4e2a5..2501dce 100644 --- a/implicit/cpu/als.py +++ b/implicit/cpu/als.py @@ -556,3 +556,6 @@ def least_squares_cg(Cui, X, Y, regularization, num_threads=0, cg_steps=3): rsold = rsnew X[u] = x + + +calculate_loss = _als.calculate_loss diff --git a/implicit/gpu/als.cu b/implicit/gpu/als.cu index 7e1a2fc..58ad047 100644 --- a/implicit/gpu/als.cu +++ b/implicit/gpu/als.cu @@ -256,7 +256,7 @@ float LeastSquaresSolver::calculate_loss(const CSRMatrix &Cui, const Matrix &X, size_t item_count = Y.rows, factors = Y.cols, user_count = X.rows; Matrix YtY(factors, factors, NULL); - calculate_yty(Y, &YtY, regularization); + calculate_yty(Y, &YtY, 0.0); float temp[2] = {0, 0}; Matrix output(2, 1, temp); @@ -276,7 +276,8 @@ float LeastSquaresSolver::calculate_loss(const CSRMatrix &Cui, const Matrix &X, CHECK_CUDA(cudaDeviceSynchronize()); output.to_host(temp); - return temp[0] / (temp[1] + Cui.rows * Cui.cols - Cui.nonzeros); + size_t rows = Cui.rows, cols = Cui.cols; + return temp[0] / (temp[1] + rows * cols - Cui.nonzeros); } LeastSquaresSolver::~LeastSquaresSolver() { diff --git a/implicit/gpu/als.py b/implicit/gpu/als.py index f628b90..7b6791c 100644 --- a/implicit/gpu/als.py +++ b/implicit/gpu/als.py @@ -314,3 +314,17 @@ def __setstate__(self, state): self._XtX = implicit.gpu.Matrix(self._XtX) if self._YtY is not None: self._YtY = implicit.gpu.Matrix(self._YtY) + + +def calculate_loss(Cui, X, Y, regularization, solver=None): + """Calculates the loss for an ALS model""" + if not isinstance(Cui, implicit.gpu.CSRMatrix): + Cui = implicit.gpu.CSRMatrix(Cui) + if not isinstance(X, implicit.gpu.Matrix): + X = implicit.gpu.Matrix(X) + if not isinstance(Y, implicit.gpu.Matrix): + Y = implicit.gpu.Matrix(Y) + if solver is None: + solver = implicit.gpu.LeastSquaresSolver() + + return solver.calculate_loss(Cui, X, Y, regularization) diff --git a/tests/als_test.py b/tests/als_test.py index 32c63c3..a29c105 100644 --- a/tests/als_test.py +++ b/tests/als_test.py @@ -301,6 +301,56 @@ def test_incremental_retrain(use_gpu): assert set(ids) == {1, 100, 101} +@pytest.mark.parametrize("use_gpu", [True, False] if HAS_CUDA else [False]) +def test_calculate_loss_simple(use_gpu): + if use_gpu: + calculate_loss = implicit.gpu.als.calculate_loss + + else: + calculate_loss = implicit.cpu.als.calculate_loss + + # the only user has liked item 0, but not interacted with item 1 + n_users, n_items = 1, 2 + ratings = coo_matrix(([1.0], ([0], [0])), shape=(n_users, n_items)).tocsr() + + # factors are designed to be perfectly wrong, to test loss function + item_factors = np.array([[0.0], [1.0]], dtype="float32") + user_factors = np.array([[1.0]], dtype="float32") + + loss = calculate_loss(ratings, user_factors, item_factors, regularization=0) + assert loss == pytest.approx(1.0) + + loss = calculate_loss(ratings, user_factors, item_factors, regularization=1.0) + assert loss == pytest.approx(2.0) + + +@pytest.mark.skipif(not implicit.gpu.HAS_CUDA, reason="needs cuda build") +@pytest.mark.parametrize("n_users", [2**13, 2**19]) +@pytest.mark.parametrize("n_items", [2**19]) +@pytest.mark.parametrize("n_samples", [2**20]) +@pytest.mark.parametrize("regularization", [0.0, 1.0, 500000.0]) +def test_gpu_loss(n_users, n_items, n_samples, regularization): + # we used to have some errors in the gpu loss function + # if n_items * n_users >2**31. Test out that the loss on the gpu + # matches that on the cpu + # https://github.com/benfred/implicit/issues/441 + # https://github.com/benfred/implicit/issues/367 + liked_items = np.random.randint(0, n_items, n_samples) + liked_users = np.random.randint(0, n_users, n_samples) + ratings = coo_matrix( + (np.ones(n_samples), (liked_users, liked_items)), shape=(n_users, n_items) + ).tocsr() + + factors = 32 + item_factors = np.random.random((n_items, factors)).astype("float32") + user_factors = np.random.random((n_users, factors)).astype("float32") + + gpu_loss = implicit.gpu.als.calculate_loss(ratings, user_factors, item_factors, regularization) + cpu_loss = implicit.cpu.als.calculate_loss(ratings, user_factors, item_factors, regularization) + + assert gpu_loss == pytest.approx(cpu_loss, rel=1e-5) + + def test_calculate_loss_segfault(): # this code used to segfault, because of a bug in calculate_loss factors = 1 @@ -311,5 +361,5 @@ def test_calculate_loss_segfault(): user_factors = np.random.random((n_users, factors)).astype("float32") c_ui = coo_matrix(([1.0, 1.0], ([0, 1], [0, 1])), shape=(n_users, n_items)).tocsr() - loss = implicit.cpu._als.calculate_loss(c_ui, user_factors, item_factors, regularization) + loss = implicit.cpu.als.calculate_loss(c_ui, user_factors, item_factors, regularization) assert loss > 0