diff --git a/daal4py/sklearn/neighbors/kdtree_knn_classifier.py b/daal4py/sklearn/neighbors/kdtree_knn_classifier.py index 77a7d832f4..a53692814c 100644 --- a/daal4py/sklearn/neighbors/kdtree_knn_classifier.py +++ b/daal4py/sklearn/neighbors/kdtree_knn_classifier.py @@ -123,6 +123,7 @@ def fit(self, X, y): # Fit the model train_algo = d4p.kdtree_knn_classification_training(fptype=fptype, + nClasses=self.n_classes_, engine=d4p.engines_mcg59(seed=self.seed_)) train_result = train_algo.compute(X, y_) diff --git a/examples/adaboost_batch.py b/examples/adaboost_batch.py index cb8fb04462..c37a3b4cb6 100644 --- a/examples/adaboost_batch.py +++ b/examples/adaboost_batch.py @@ -33,9 +33,10 @@ def main(readcsv=read_csv, method='defaultDense'): infile = "./data/batch/adaboost_train.csv" testfile = "./data/batch/adaboost_test.csv" + nClasses = 2 # Configure a adaboost training object - train_algo = d4p.adaboost_training() + train_algo = d4p.adaboost_training(nClasses=nClasses) # Read data. Let's have 20 independent, and 1 dependent variable (for each observation) indep_data = readcsv(infile, range(20)) @@ -44,7 +45,7 @@ def main(readcsv=read_csv, method='defaultDense'): train_result = train_algo.compute(indep_data, dep_data) # Now let's do some prediction - predict_algo = d4p.adaboost_prediction() + predict_algo = d4p.adaboost_prediction(nClasses=nClasses) # read test data (with same #features) pdata = readcsv(testfile, range(20)) # now predict using the model from the training above diff --git a/examples/kdtree_knn_classification_batch.py b/examples/kdtree_knn_classification_batch.py index 44ce93b6b1..21cc6ae34c 100644 --- a/examples/kdtree_knn_classification_batch.py +++ b/examples/kdtree_knn_classification_batch.py @@ -38,11 +38,12 @@ def main(readcsv=read_csv, method='defaultDense'): # Read data. Let's use 5 features per observation nFeatures = 5 + nClasses = 5 train_data = readcsv(train_file, range(nFeatures)) train_labels = readcsv(train_file, range(nFeatures, nFeatures+1)) # Create an algorithm object and call compute - train_algo = d4p.kdtree_knn_classification_training() + train_algo = d4p.kdtree_knn_classification_training(nClasses=nClasses) # 'weights' is optional argument, let's use equal weights # in this case results must be the same as without weights weights = np.ones((train_data.shape[0], 1)) @@ -64,6 +65,6 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": (train_result, predict_result, predict_labels) = main() - print("KD-tree based kNN classification results (first 20 observations):") + print("KD-tree based kNN classification results:") print("Ground truth(observations #30-34):\n", predict_labels[30:35]) print("Classification results(observations #30-34):\n", predict_result.prediction[30:35]) diff --git a/examples/log_reg_dense_batch.py b/examples/log_reg_dense_batch.py index 40f9682710..a36cfa9871 100644 --- a/examples/log_reg_dense_batch.py +++ b/examples/log_reg_dense_batch.py @@ -51,8 +51,15 @@ def main(readcsv=read_csv, method='defaultDense'): predict_data = readcsv(testfile, range(nFeatures)) # set parameters and compute predictions - predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses, - resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities") + # previous version has different interface + from daal4py import __daal_link_version__ as dv + daal_version = tuple(map(int, (dv[0:4], dv[4:8]))) + if daal_version < (2020,0): + predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses, + resultsToCompute="computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities") + else: + predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses, + resultsToEvaluate="computeClassLabels|computeClassProbabilities|computeClassLogProbabilities") predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction, probabilities and logProbabilities assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses) diff --git a/examples/run_examples.py b/examples/run_examples.py index 5f89e59e34..25ee6811ce 100755 --- a/examples/run_examples.py +++ b/examples/run_examples.py @@ -41,11 +41,11 @@ req_version = defaultdict(lambda:(2019,0)) req_version['decision_forest_classification_batch.py'] = (2019,1) req_version['decision_forest_regression_batch.py'] = (2019,1) -req_version['adaboost_batch.py'] = (2020,1) -req_version['brownboost_batch.py'] = (2020,1) -req_version['logitboost_batch.py'] = (2020,1) -req_version['stump_classification_batch.py'] = (2020,1) -req_version['stump_regression_batch.py'] = (2020,1) +req_version['adaboost_batch.py'] = (2020,0) +req_version['brownboost_batch.py'] = (2020,0) +req_version['logitboost_batch.py'] = (2020,0) +req_version['stump_classification_batch.py'] = (2020,0) +req_version['stump_regression_batch.py'] = (2020,0) req_version['saga_batch.py'] = (2019,3) req_version['dbscan_batch.py'] = (2019,5) req_version['lasso_regression_batch.py'] = (2019,5) diff --git a/generator/gen_daal4py.py b/generator/gen_daal4py.py index 11481dfeaa..4e324fffe4 100644 --- a/generator/gen_daal4py.py +++ b/generator/gen_daal4py.py @@ -112,7 +112,7 @@ class cython_interface(object): 'serializeImpl', 'deserializeImpl', 'serialImpl', 'getEpsilonVal', 'getMinVal', 'getMaxVal', 'getPMMLNumType', 'getInternalNumType', 'getIndexNumType', 'allocateNumericTableImpl', 'allocateImpl', 'allocate', 'initialize', - 'setPartialResultStorage', 'addPartialResultStorage',] + 'setPartialResultStorage', 'addPartialResultStorage'] # files we ignore/skip ignore_files = ['daal_shared_ptr.h', 'daal.h', 'daal_win.h', 'algorithm_base_mode_batch.h', diff --git a/generator/parse.py b/generator/parse.py index 1122b37b67..3723757867 100644 --- a/generator/parse.py +++ b/generator/parse.py @@ -518,6 +518,8 @@ def parse_header(header, ignores): # first strip of eol comments if it is not the link if not re.search(r'https?://', l): l = l.split('//')[0] + # delete 'DAAL_DEPRECATED' + l = l.replace('DAAL_DEPRECATED ', '') # apply each parser, continue to next line if possible for p in parsers: if p.parse(l, ctxt): diff --git a/generator/wrappers.py b/generator/wrappers.py index 8c77bdfe6a..8e66664c71 100644 --- a/generator/wrappers.py +++ b/generator/wrappers.py @@ -20,9 +20,8 @@ # given a C++ namespace and a DAAL version, return if namespace/algo should be # wrapped in daal4py. def wrap_algo(algo, ver): - #return True if 'kmeans' in algo and not 'interface' in algo else False # Ignore some algos if using older DAAL - if ver < (2020, 1) and any(x in algo for x in ['stump', 'adaboost', 'brownboost', 'logitboost',]): + if ver < (2020, 0) and any(x in algo for x in ['adaboost', 'stump', 'brownboost', 'logitboost',]): return False # ignore deprecated version of stump if 'stump' in algo and not any(x in algo for x in ['stump::regression', 'stump::classification']): @@ -73,6 +72,8 @@ def wrap_algo(algo, ver): 'algorithms::optimization_solver::lbfgs': [('function', 'daal::algorithms::optimization_solver::sum_of_functions::BatchPtr')], 'algorithms::optimization_solver::adagrad': [('function', 'daal::algorithms::optimization_solver::sum_of_functions::BatchPtr')], 'algorithms::dbscan': [('epsilon', 'fptype'), ('minObservations', 'size_t')], + 'algorithms::adaboost::prediction': [('nClasses', 'size_t')], + 'algorithms::adaboost::training': [('nClasses', 'size_t')], } # Some algorithms have no public constructors and need to be instantiated with 'create' @@ -100,13 +101,15 @@ def wrap_algo(algo, ver): ignore = { 'algorithms::kmeans::init': ['firstIteration', 'outputForStep5Required',], # internal for distributed 'algorithms::kmeans::init::interface1': ['nRowsTotal', 'offset', 'seed',], # internal for distributed, deprecated - 'algorithms::gbt::regression::training': ['dependentVariables'], # dependentVariables from parent class is not used + 'algorithms::gbt::regression::training': ['dependentVariables', 'weights'], # dependentVariables, weights from parent class is not used 'algorithms::decision_forest::training': ['seed',], # deprecated 'algorithms::decision_forest::classification::training': ['updatedEngine',], # output 'algorithms::decision_forest::regression::training': ['algorithms::regression::training::InputId', # InputId from parent class is not used 'updatedEngine',], # output 'algorithms::linear_regression::prediction': ['algorithms::linear_model::interceptFlag',], # parameter + 'algorithms::linear_regression::training': ['weights',], # weights from parent class is not used 'algorithms::ridge_regression::prediction': ['algorithms::linear_model::interceptFlag',], # parameter + 'algorithms::ridge_regression::training': ['weights',], # weights from parent class is not used 'algorithms::optimization_solver::sgd': ['optionalArgument', 'algorithms::optimization_solver::iterative_solver::OptionalResultId', 'pastUpdateVector', 'pastWorkValue', 'seed',], # internal stuff, deprecated 'algorithms::optimization_solver::lbfgs': ['optionalArgument', 'algorithms::optimization_solver::iterative_solver::OptionalResultId', @@ -126,6 +129,7 @@ def wrap_algo(algo, ver): 'algorithms::kdtree_knn_classification': ['seed',], # deprecated 'algorithms::lasso_regression::training': ['optionalArgument'], # internal stuff 'algorithms::lasso_regression::prediction': ['algorithms::linear_model::interceptFlag',], # parameter + 'algorithms::multi_class_classifier': ['algorithms::multi_class_classifier::getTwoClassClassifierModels',] # unsupported return type ModelPtr* } # List of InterFaces, classes that can be arguments to other algorithms diff --git a/tests/test_examples.py b/tests/test_examples.py index 8e24daee30..deb52fdf14 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -86,11 +86,11 @@ def test_svm_batch(self): gen_examples = [ - ('adaboost_batch', None, None, (2020, 1)), + ('adaboost_batch', None, None, (2020, 0)), ('adagrad_mse_batch', 'adagrad_mse_batch.csv', 'minimum'), ('association_rules_batch', 'association_rules_batch.csv', 'confidence'), ('bacon_outlier_batch', 'multivariate_outlier_batch.csv', lambda r: r[1].weights), - ('brownboost_batch', None, None, (2020, 1)), + ('brownboost_batch', None, None, (2020, 0)), ('correlation_distance_batch', 'correlation_distance_batch.csv', lambda r: [[np.amin(r.correlationDistance)], [np.amax(r.correlationDistance)], [np.mean(r.correlationDistance)], @@ -122,7 +122,7 @@ def test_svm_batch(self): ('linear_regression_streaming', 'linear_regression_batch.csv', lambda r: r[1].prediction), ('log_reg_binary_dense_batch', 'log_reg_binary_dense_batch.csv', lambda r: r[1].prediction), ('log_reg_dense_batch',), - ('logitboost_batch', None, None, (2020, 1)), + ('logitboost_batch', None, None, (2020, 0)), ('low_order_moms_dense_batch', 'low_order_moms_dense_batch.csv', lambda r: np.vstack((r.minimum, r.maximum, r.sum, @@ -164,8 +164,8 @@ def test_svm_batch(self): ('sgd_logistic_loss_batch', 'sgd_logistic_loss_batch.csv', 'minimum'), ('sgd_mse_batch', 'sgd_mse_batch.csv', 'minimum'), ('sorting_batch',), - ('stump_classification_batch', None, None, (2020, 1)), - ('stump_regression_batch', None, None, (2020, 1)), + ('stump_classification_batch', None, None, (2020, 0)), + ('stump_regression_batch', None, None, (2020, 0)), ('svm_multiclass_batch', 'svm_multiclass_batch.csv', lambda r: r[0].prediction), ('univariate_outlier_batch', 'univariate_outlier_batch.csv', lambda r: r[1].weights), ('dbscan_batch', 'dbscan_batch.csv', 'assignments', (2019, 5)), @@ -199,6 +199,8 @@ def call(self, ex): # some algos do not support CSR matrices if ex.__name__.startswith('sorting'): self.skipTest("not supporting CSR") + if any (ex.__name__.startswith(x) for x in ['adaboost', 'brownboost', 'stump_classification']): + self.skipTest("not supporting CSR") method = 'singlePassCSR' if any(x in ex.__name__ for x in ['low_order_moms', 'covariance']) else 'fastCSR' # cannot use fastCSR ofr implicit als; bug in Intel(R) DAAL? if 'implicit_als' in ex.__name__: