diff --git a/examples/gradient_boosted_classification_batch.py b/examples/gradient_boosted_classification_batch.py index 4261e6b776..6dcbd7b093 100644 --- a/examples/gradient_boosted_classification_batch.py +++ b/examples/gradient_boosted_classification_batch.py @@ -40,10 +40,20 @@ def main(readcsv=read_csv, method='defaultDense'): testfile = "./data/batch/df_classification_test.csv" # Configure a training object (5 classes) - train_algo = d4p.gbt_classification_training(nClasses=nClasses, - maxIterations=maxIterations, - minObservationsInLeafNode=minObservationsInLeafNode, - featuresPerNode=nFeatures) + # previous version has different interface + from daal4py import __daal_link_version__ as dv + daal_version = tuple(map(int, (dv[0:4], dv[4:8]))) + if daal_version < (2020,0): + train_algo = d4p.gbt_classification_training(nClasses=nClasses, + maxIterations=maxIterations, + minObservationsInLeafNode=minObservationsInLeafNode, + featuresPerNode=nFeatures) + else: + train_algo = d4p.gbt_classification_training(nClasses=nClasses, + maxIterations=maxIterations, + minObservationsInLeafNode=minObservationsInLeafNode, + featuresPerNode=nFeatures, + varImportance='weight|totalCover|cover|totalGain|gain') # Read data. Let's use 3 features per observation data = readcsv(infile, range(3), t=np.float32) @@ -51,7 +61,12 @@ def main(readcsv=read_csv, method='defaultDense'): train_result = train_algo.compute(data, labels) # Now let's do some prediction - predict_algo = d4p.gbt_classification_prediction(5) + # previous version has different interface + if daal_version < (2020,0): + predict_algo = d4p.gbt_classification_prediction(nClasses=nClasses) + else: + predict_algo = d4p.gbt_classification_prediction(nClasses=nClasses, + resultsToEvaluate="computeClassLabels|computeClassProbabilities") # read test data (with same #features) pdata = readcsv(testfile, range(3), t=np.float32) # now predict using the model from the training above @@ -68,4 +83,14 @@ def main(readcsv=read_csv, method='defaultDense'): (train_result, predict_result, plabels) = main() print("\nGradient boosted trees prediction results (first 10 rows):\n", predict_result.prediction[0:10]) print("\nGround truth (first 10 rows):\n", plabels[0:10]) + # these results are available only in new version + from daal4py import __daal_link_version__ as dv + daal_version = tuple(map(int, (dv[0:4], dv[4:8]))) + if daal_version >= (2020,0): + print("\nGradient boosted trees prediction probabilities (first 10 rows):\n", predict_result.probabilities[0:10]) + print("\nvariableImportanceByWeight:\n", train_result.variableImportanceByWeight) + print("\nvariableImportanceByTotalCover:\n", train_result.variableImportanceByTotalCover) + print("\nvariableImportanceByCover:\n", train_result.variableImportanceByCover) + print("\nvariableImportanceByTotalGain:\n", train_result.variableImportanceByTotalGain) + print("\nvariableImportanceByGain:\n", train_result.variableImportanceByGain) print('All looks good!') diff --git a/generator/gen_daal4py.py b/generator/gen_daal4py.py index 4e324fffe4..3f11f2cd11 100644 --- a/generator/gen_daal4py.py +++ b/generator/gen_daal4py.py @@ -29,7 +29,7 @@ from collections import defaultdict, OrderedDict from jinja2 import Template from .parse import parse_header, parse_version -from .wrappers import required, ignore, defaults, has_dist, ifaces, no_warn, no_constructor, add_setup, enum_maps, wrap_algo +from .wrappers import required, ignore, defaults, has_dist, ifaces, no_warn, no_constructor, add_setup, enum_maps, enum_params, wrap_algo from .wrapper_gen import wrapper_gen, typemap_wrapper_template from .format import mk_var @@ -265,11 +265,13 @@ def get_all_attrs(self, ns, cls, attr, ons=None): ############################################################################### - def to_lltype(self, t): + def to_lltype(self, p, t): """ return low level (C++ type). Usually the same as input. Only very specific cases need a conversion. """ + if p in enum_params: + return enum_params[p] if t in ['DAAL_UINT64']: return 'ResultToComputeId' return t @@ -285,9 +287,6 @@ def to_hltype(self, ns, t): '?' means we do not know what 't' is For classes, we also add lookups in namespaces that DAAL C++ API finds through "using". """ - if t in ['DAAL_UINT64']: - ### FIXME - t = 'ResultToComputeId' tns, tname = splitns(t) if t in ['double', 'float', 'int', 'size_t',]: return (t, 'stdtype', '') @@ -681,10 +680,10 @@ def prepare_hlwrapper(self, ns, mode, func, no_dist, no_stream): for p in all_params: pns, tmp = splitns(p) if not tmp.startswith('_') and not ignored(pns, tmp): - hlt = self.to_hltype(pns, all_params[p][0]) + llt = self.to_lltype(p, all_params[p][0]) + hlt = self.to_hltype(pns, llt) if hlt and hlt[1] in ['stdtype', 'enum', 'class']: (hlt, hlt_type, hlt_ns) = hlt - llt = self.to_lltype(all_params[p][0]) pval = None if hlt_type == 'enum': thetype = hlt_ns + '::' + llt.rsplit('::', 1)[-1] diff --git a/generator/wrappers.py b/generator/wrappers.py index 8e66664c71..e3ddd3d1a7 100644 --- a/generator/wrappers.py +++ b/generator/wrappers.py @@ -182,6 +182,16 @@ def wrap_algo(algo, ver): 'algorithms::pca::ResultToComputeId' : 'result_dataForTransform', } +# Enums are used as a values to define bit-mask in Parameter +# Parameter itself defined as DAAL_UINT64, we can't determine possible values +# this dict shows what Enum contain a values for Parameter +# if such parameter is not in this dict then we think that it is 'ResultToComputeId' +# Parameter->Enum of values +enum_params = { + 'algorithms::gbt::classification::training::varImportance': 'algorithms::gbt::training::VariableImportanceModes', + 'algorithms::gbt::regression::training::varImportance': 'algorithms::gbt::training::VariableImportanceModes', +} + # The distributed algorithm configuration parameters # Note that all have defaults and so are optional. # In particular note that the name of a single input argument defaults to data.