Skip to content

Commit

Permalink
A few more fixes:
Browse files Browse the repository at this point in the history
* freeing memory
* copy X, theta back to host
* separate method to get factorization score
* pytest
  • Loading branch information
sh1ng committed Mar 9, 2019
1 parent 6b562fb commit a1607de
Show file tree
Hide file tree
Showing 8 changed files with 275 additions and 90 deletions.
6 changes: 1 addition & 5 deletions src/gpu/factorization/als.h
Original file line number Diff line number Diff line change
Expand Up @@ -2222,9 +2222,7 @@ class ALSFactorization
#ifdef DEBUG
printf("update X run %f seconds, gridSize: %d, blockSize %d.\n", seconds() - t0, m, f);
#endif
// cudacall(cudaFree(csrRowIndex));
// cudacall(cudaFree(csrColIndex));
// cudacall(cudaFree(ythetaT));
cudacall(cudaFree(ythetaT));

#ifdef DEBUG
t0 = seconds();
Expand Down Expand Up @@ -2363,8 +2361,6 @@ class ALSFactorization
cublasHandle_t handle;
cusparseHandle_t cushandle;
cusparseMatDescr_t descr;
T *ytheta = 0;
T *ythetaT = 0;
T *thetaT;
T *XT;
};
Expand Down
29 changes: 28 additions & 1 deletion src/gpu/factorization/factorization.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,32 @@
#include "cuda_utils.h"
#include "solver/factorization.h"

template <class T> void free_data(T **ptr) {
if (ptr)
CUDACHECK(cudaFree(*ptr));
}

void free_data_float(float **ptr) { free_data<float>(ptr); }

void free_data_double(double **ptr) { free_data<double>(ptr); }

void free_data_int(int **ptr) { free_data<int>(ptr); }

template <class T>
void copy_fecatorization_result(T *dst, const T **src, const int size) {
CUDACHECK(cudaMemcpy(dst, *src, sizeof(T) * size, cudaMemcpyDeviceToHost));
}

void copy_fecatorization_result_float(float *dst, const float **src,
const int size) {
copy_fecatorization_result(dst, src, size);
}

void copy_fecatorization_result_double(double *dst, const double **src,
const int size) {
copy_fecatorization_result(dst, src, size);
}

template <class T>
int make_factorization_data(
const int m, const int n, const int f, const long nnz, const long nnz_test,
Expand Down Expand Up @@ -77,7 +103,8 @@ int make_factorization_data(
(size_t)(nnz * sizeof(**csrValDevicePtr)),
cudaMemcpyHostToDevice));

if (cooColIndexTestHostPtr && cooRowIndexTestHostPtr && cooValTestHostPtr) {
if (cooColIndexTestHostPtr && cooRowIndexTestHostPtr && cooValTestHostPtr &&
nnz_test > 0) {
CUDACHECK(cudaMalloc((void **)cooRowIndexTestDevicePtr,
nnz_test * sizeof(**cooRowIndexTestDevicePtr)));
CUDACHECK(cudaMalloc((void **)cooColIndexTestDevicePtr,
Expand Down
12 changes: 12 additions & 0 deletions src/include/solver/factorization.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
#ifndef SRC_INCLUDE_SOLVER_FACTORIZATION_H

void free_data_float(float **ptr);

void free_data_double(double **ptr);

void free_data_int(int **ptr);

void copy_fecatorization_result_float(float *dst, const float **src,
const int size);

void copy_fecatorization_result_double(double *dst, const double **src,
const int size);

int make_factorization_data_double(
const int m, const int n, const int f, const long nnz, const long nnz_test,
const int *csrRowIndexHostPtr, const int *csrColIndexHostPtr,
Expand Down
217 changes: 156 additions & 61 deletions src/interface_py/h2o4gpu/solvers/factorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,62 +11,102 @@
import scipy.sparse


class FactorizationH2O(object):
'''[summary]
def _get_sparse_matrixes(X):
'''Create csc, csr and coo sparse matrix from any of the above
Arguments:
object {[type]} -- [description]
X {array-like, csc, csr or coo sparse matrix}
Returns:
[type] -- [description]
csc, csr, coo
'''

X_coo = X_csc = X_csr = None
if scipy.sparse.isspmatrix_coo(X):
X_coo = X
X_csr = X_coo.tocsr(True)
X_csc = X_coo.tocsc(True)
elif scipy.sparse.isspmatrix_csr(X):
X_csr = X
X_csc = X_csr.tocoo(True)
X_coo = X_csr.tocsc(True)
elif scipy.sparse.isspmatrix_csc(X):
X_csc = X
X_csr = X_csc.tocsr(True)
X_coo = X_csc.tocoo(True)
else:
assert False, "only coo, csc and csr sparse matrixes are supported"
return X_csc, X_csr, X_coo


class FactorizationH2O(object):
'''Factors a sparse rating matrix X (m by n, with N_z non-zero elements)
into a m-by-f and a f-by-n matrices.
Arguments:
f {int} -- decomposition size
lambda_ {float} -- lambda regularization
Keyword Arguments:
max_iter {int} -- number of training iterations (default: {100})
double_precision {bool} -- use double presition, not yet supported (default: {False})
thetaT {array-like} shape (n, f) -- initial theta matrix (default: {None})
XT {array-like} shape (m, f) -- initial XT matrix (default: {None})
Attributes:
X {array-like} shape (m, f) -- X matrix contains User's features
thetaT {array-like} shape (n, f) -- transposed theta matrix, item's features
'''

def __init__(self, f, lambda_, max_iter=100, double_precision=False, thetaT=None, XT=None):
self.max_iter = max_iter
assert not double_precision, 'double precision is not yet supported'
assert f % 10 == 0, 'f has to be a multiple of 10'
self.f = f
self.lambda_ = lambda_
self.double_precision = double_precision
self.dtype = np.float64 if self.double_precision else np.float32
self.thetaT = thetaT
self.XT = XT
self.max_iter = max_iter

def _load_lib(self):
from ..libs.lib_utils import GPUlib

gpu_lib = GPUlib().get(1)
return gpu_lib

def fit(self, X, X_test=None, X_BATCHES=1, THETA_BATCHES=1, early_stopping_rounds=None, verbose=False):
'''[summary]
def fit(self, X, y=None, X_test=None, X_BATCHES=1, THETA_BATCHES=1, early_stopping_rounds=None, verbose=False, scores=None):
#pylint: disable=unused-argument
'''earn model from rating matrix X
Arguments:
X {[type]} -- [description]
X {array-like, sparse matrix}, shape (m, n) -- Data matrix to be decomposed
Keyword Arguments:
X_test {[type]} -- [description] (default: {None})
X_BATCHES {int} -- [description] (default: {1})
THETA_BATCHES {int} -- [description] (default: {1})
early_stopping_rounds {[type]} -- [description] (default: {None})
verbose {bool} -- [description] (default: {False})
y -- ignored
X_test {array-like, coo sparse matrix}, shape (m, n) -- Data matrix for cross validation
X_BATCHES {int} -- batches to split XT (default: {1})
THETA_BATCHES {int} -- batches to split theta (default: {1})
early_stopping_rounds {int} -- Activates early stopping. Cross validation error needs to decrease
at least every <early_stopping_rounds> round(s) to continue training. Requires <>. If there’s
more than one, will use the last. Returns the model from the last iteration (not the best one).
If early stopping occurs, the model will have three additional fields: best_cv_score,
best_train_score and best_iteration.
verbose {bool} -- prints training and validation score(if applicable) on each iteration (default: {False})
scores {list} -- list of tuples with train, cv score for every iteration
'''

csc_X, csr_X, coo_X = _get_sparse_matrixes(X)

if early_stopping_rounds is not None:
assert X_test is not None, 'X_test is mandatory with early stopping'
assert scipy.sparse.isspmatrix_csc(
X), 'X must be a csc sparse scipy matrix'
if X_test is not None:
assert scipy.sparse.isspmatrix_coo(
X_test), 'X_test must be a coo sparse scipy matrix'
assert X.shape == X_test.shape
assert X_test.dtype == self.dtype

dtype = np.float64 if self.double_precision else np.float32

assert X.dtype == dtype
assert X_test.dtype == dtype

csc_X = X
csr_X = csc_X.tocsr(True)
coo_X = csc_X.tocoo(True)
assert X.dtype == self.dtype

coo_X_test = X_test

Expand All @@ -75,27 +115,32 @@ def fit(self, X, X_test=None, X_BATCHES=1, THETA_BATCHES=1, early_stopping_round
make_data = lib.make_factorization_data_double
run_step = lib.run_factorization_step_double
factorization_score = lib.factorization_score_double
copy_fecatorization_result = lib.copy_fecatorization_result_double
free_data = lib.free_data_double
else:
make_data = lib.make_factorization_data_float
run_step = lib.run_factorization_step_float
factorization_score = lib.factorization_score_float
copy_fecatorization_result = lib.copy_fecatorization_result_float
free_data = lib.free_data_float

m = coo_X.shape[0]
n = coo_X.shape[1]
nnz = csc_X.nnz
nnz_test = coo_X_test.nnz
if coo_X_test is None:
nnz_test = 0
else:
nnz_test = coo_X_test.nnz

if self.thetaT is None:
thetaT = np.random.rand(n, self.f).astype(dtype)
self.thetaT = np.random.rand(n, self.f).astype(self.dtype)
else:
thetaT = self.thetaT
assert thetaT.dtype == dtype
assert self.thetaT.dtype == self.dtype

if self.XT is None:
XT = np.random.rand(m, self.f).astype(dtype)
self.XT = np.random.rand(m, self.f).astype(self.dtype)
else:
XT = self.XT
XT.dtype = dtype
assert self.XT.dtype == self.dtype

csrRowIndexDevicePtr = None
csrColIndexDevicePtr = None
Expand All @@ -120,17 +165,20 @@ def fit(self, X, X_test=None, X_BATCHES=1, THETA_BATCHES=1, early_stopping_round
m, n, self.f, nnz, nnz_test, csr_X.indptr, csr_X.indices, csr_X.data,
csc_X.indices, csc_X.indptr, csc_X.data,
coo_X.row, coo_X.col, coo_X.data,
thetaT, XT, coo_X_test.row,
coo_X_test.col, coo_X_test.data, csrRowIndexDevicePtr, csrColIndexDevicePtr,
csrValDevicePtr, cscRowIndexDevicePtr, cscColIndexDevicePtr, cscValDevicePtr,
self.thetaT, self.XT, coo_X_test.row if coo_X_test is not None else None,
coo_X_test.col if coo_X_test is not None else None, coo_X_test.data if coo_X_test is not None else None,
csrRowIndexDevicePtr, csrColIndexDevicePtr, csrValDevicePtr, cscRowIndexDevicePtr, cscColIndexDevicePtr, cscValDevicePtr,
cooRowIndexDevicePtr, cooColIndexDevicePtr, cooValDevicePtr,
thetaTDevice, XTDevice, cooRowIndexTestDevicePtr,
cooColIndexTestDevicePtr, cooValTestDevicePtr)

assert status == 0, 'Failure uploading the data'

best_CV = np.inf
best_Iter = -1
self.best_train_score = np.inf
self.best_cv_score = np.inf
self.best_iteration = -1
cv_score = train_score = np.inf

for i in range(self.max_iter):
status = run_step(m,
n,
Expand All @@ -147,35 +195,82 @@ def fit(self, X, X_test=None, X_BATCHES=1, THETA_BATCHES=1, early_stopping_round
XTDevice,
X_BATCHES,
THETA_BATCHES)
result = factorization_score(m,
n,
self.f,
nnz,
self.lambda_,
thetaTDevice,
XTDevice,
cooRowIndexDevicePtr,
cooColIndexDevicePtr,
cooValDevicePtr)
train_score = result[0]
result = factorization_score(m,
n,
self.f,
nnz_test,
self.lambda_,
thetaTDevice,
XTDevice,
cooRowIndexTestDevicePtr,
cooColIndexTestDevicePtr,
cooValTestDevicePtr)
cv_score = result[0]
if verbose or scores is not None:
result = factorization_score(m,
n,
self.f,
nnz,
self.lambda_,
thetaTDevice,
XTDevice,
cooRowIndexDevicePtr,
cooColIndexDevicePtr,
cooValDevicePtr)
train_score = result[0]
if X_test is not None and (verbose or early_stopping_rounds is not None or scores is not None):
result = factorization_score(m,
n,
self.f,
nnz_test,
self.lambda_,
thetaTDevice,
XTDevice,
cooRowIndexTestDevicePtr,
cooColIndexTestDevicePtr,
cooValTestDevicePtr)
cv_score = result[0]
if verbose:
print("iteration {0} train: {1} cv: {2}".format(
i, train_score, cv_score))
if scores is not None:
scores.append((train_score, cv_score))

if early_stopping_rounds is not None:
if best_CV > cv_score:
best_CV = cv_score
best_Iter = i
if (i - best_Iter) > early_stopping_rounds:
if self.best_cv_score > cv_score:
self.best_cv_score = cv_score
self.best_train_score = train_score
self.best_iteration = i
if (i - self.best_iteration) > early_stopping_rounds:
if verbose:
print('best iteration:{0} train: {1} cv: {2}'.format(
self.best_iteration, self.best_train_score, self.best_cv_score))
break

lib.free_data_int(csrRowIndexDevicePtr)
lib.free_data_int(csrColIndexDevicePtr)
free_data(csrValDevicePtr)
lib.free_data_int(cscRowIndexDevicePtr)
lib.free_data_int(cscColIndexDevicePtr)
free_data(cscValDevicePtr)
lib.free_data_int(cooRowIndexDevicePtr)
lib.free_data_int(cooColIndexDevicePtr)
free_data(cooValDevicePtr)
lib.free_data_int(cooRowIndexTestDevicePtr)
lib.free_data_int(cooColIndexTestDevicePtr)
free_data(cooValTestDevicePtr)

copy_fecatorization_result(self.XT, XTDevice, m * self.f)
copy_fecatorization_result(self.thetaT, thetaTDevice, n * self.f)

free_data(thetaTDevice)
free_data(XTDevice)

return self

def predict(self, X):
'''Predict none zero elements of coo sparse matrix X according to the fitted model
Arguments:
X {array-like, sparse coo matrix} shape (m, n)
Data matrix in coo format
Returns
prediction : array,shape (m, n)
'''

assert self.XT is not None and self.thetaT is not None, 'tranform is invoked on an unfitted model'
assert scipy.sparse.isspmatrix_coo(
X), 'convert X to coo sparse matrix'
assert X.dtype == self.dtype
a = np.take(self.XT, X.row, axis=0)
b = np.take(self.thetaT, X.col, axis=0)
val = np.sum(a * b, axis=1)
return scipy.sparse.coo_matrix((val, (X.row, X.col)), shape=X.shape)
Loading

0 comments on commit a1607de

Please sign in to comment.