diff --git a/docs/thirdparty.rst b/docs/thirdparty.rst index 0dc3d4c..fd9d9a6 100644 --- a/docs/thirdparty.rst +++ b/docs/thirdparty.rst @@ -157,7 +157,7 @@ Once they're in Xcessiv, TPOT pipelines are just regular base learners you can t Create and finalize a preset Logistic Regression base learner. We'll use this to stack the base learners together. -Let's begin by stacking together the two highest performers. the ExtraTreesClassifier and the KNeighborsClassifier without the original features. Right off the bat, cross-validating on the secondary meta-features yields an accuracy of 0.9975. +Let's begin by stacking together the two highest performers, the ExtraTreesClassifier and the KNeighborsClassifier without the original features. Right off the bat, cross-validating on the secondary meta-features yields an accuracy of 0.9975. Going further, let's see if adding the less effective (on its own) Linear SVM will prove useful to our small ensemble. Running it, we get an even better 0.9992 accuracy. diff --git a/docs/walkthrough.rst b/docs/walkthrough.rst index 882d113..acfa7b7 100644 --- a/docs/walkthrough.rst +++ b/docs/walkthrough.rst @@ -427,3 +427,46 @@ Here's a complete list of what happens when Xcessiv creates a new ensemble. Note And that's it! Try experimenting with more base learners, appending the original features to the meta-features, and even changing the type of your secondary learner. Push that accuracy up as high as you possibly can! Normally, it would take a lot of extraneous code just to set things up and keep track of everything you try, but Xcessiv takes care of all the dirty work so you can focus solely on the important thing, constructing your ultimate ensemble. + +Exporting your stacked ensemble +------------------------------- + +Let's say that after trying out different stacked ensemble combinations, you think you've found the one. It wouldn't be very useful if you didn't have a way to use it on other data to generate predictions. Xcessiv offers a way to convert any stacked ensemble into an importable Python package. Click on the export icon of your chosen ensemble, and enter a unique package name to save your package as. + +Give your package name a unique name that conforms to Python package naming conventions. For example, we obviously wouldn't want to name our package "numpy" or "my.package". In this walkthrough, we might save our package as "DigitsDataEnsemble1". + +On successful export, Xcessiv will automatically save your package inside your project folder. + +Your ensemble can then be imported from :class:`DigitsDataEnsemble1` like this.:: + + # Make sure DigitsDataEnsemble1 is importable + from DigitsDataEnsemble1 import xcessiv_ensemble + +``xcessiv_ensemble`` will then contain a stacked ensemble instance with the methods ``get_params``, ``set_params``, ``fit``, and the ensemble's secondary learner's meta-feature generator method. For example, if your secondary learner's meta-feature generator method is ``predict``, you'll be able to call :func:`xcessiv_ensemble.predict` after fitting. + +Here's an example of how you'd normally use an imported ensemble.:: + + from DigitsDataEnsemble1 import xcessiv_ensemble + + # Fit all base learners and secondary learner on training data + xcessiv_ensemble.fit(X_train, y_train) + + # Generate some predictions on test/unseen data + predictions = xcessiv_ensemble.predict(X_test) + +Most common use cases for ``xcessiv_ensemble`` will involve using a method other than the configured meta-feature generator. Take the case of using :class:`sklearn.linear_model.LogisticRegression` as our secondary learner. :class:`sklearn.linear_model.LogisticRegression` has both methods :func:`predict` and :func:`predict_proba`, but if our meta-feature generator is set to :func:`predict`, Xcessiv doesn't know :func:`predict_proba` actually exists and only :func:`xcessiv_ensemble.predict` will be a valid method. For these cases, ``xcessiv_ensemble`` exposes a method :func:`_process_using_meta_feature_generator` you can use in the following way.:: + + from DigitsDataEnsemble1 import xcessiv_ensemble + + # Fit all base learners and secondary learner on training data + xcessiv_ensemble.fit(X_train, y_train) + + # Generate some prediction probabilities on test/unseen data + probas = xcessiv_ensemble._process_using_meta_feature_generator(X_test, 'predict_proba') + +You'll notice that ``xcessiv_ensemble`` follows the **scikit-learn** interface for estimators. That means you'll be able to use it as its own standalone base learner. If you're crazy enough, you can even try *stacking together already stacked ensembles*. For now, the recommended way of quickly adding your stacked ensemble as a separate base learner is to write something like this in your base learner setup.:: + + # Make sure DigitsDataEnsemble1 is importable + from DigitsDataEnsemble1 import xcessiv_ensemble + + base_learner = xcessiv_ensemble diff --git a/setup.py b/setup.py index b94b106..4265a81 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def run_tests(self): setup( name='xcessiv', - version='0.3.4', + version='0.3.5', url='https://github.com/reiinakano/xcessiv', license='Apache License 2.0', author='Reiichiro Nakano', diff --git a/xcessiv/__init__.py b/xcessiv/__init__.py index 9fb0768..b0bcb03 100644 --- a/xcessiv/__init__.py +++ b/xcessiv/__init__.py @@ -2,7 +2,7 @@ from flask import Flask -__version__ = '0.3.4' +__version__ = '0.3.5' app = Flask(__name__, static_url_path='/static', static_folder='ui/build/static') diff --git a/xcessiv/models.py b/xcessiv/models.py index 039a541..aea9305 100644 --- a/xcessiv/models.py +++ b/xcessiv/models.py @@ -194,6 +194,28 @@ def cleanup(self, path): for learner in self.base_learners: learner.cleanup(path) + def export_as_file(self, filepath, hyperparameters): + """Generates a Python file with the importable base learner set to ``hyperparameters`` + + This function generates a Python file in the specified file path that contains + the base learner as an importable variable stored in ``base_learner``. The base + learner will be set to the appropriate hyperparameters through ``set_params``. + + Args: + filepath (str, unicode): File path to save file in + + hyperparameters (dict): Dictionary to use for ``set_params`` + """ + if not filepath.endswith('.py'): + filepath += '.py' + + file_contents = '' + file_contents += self.source + file_contents += '\n\nbase_learner.set_params(**{})\n'.format(hyperparameters) + file_contents += '\nmeta_feature_generator = "{}"\n'.format(self.meta_feature_generator) + with open(filepath, 'wb') as f: + f.write(file_contents.encode('utf8')) + class AutomatedRun(Base): """This table contains initialized/completed automated hyperparameter searches""" @@ -315,6 +337,18 @@ def cleanup(self, path): """ self.delete_meta_features(path) + def export_as_file(self, filepath): + """Generates a Python file with the importable base learner + + This function generates a Python file in the specified file path that contains + the base learner as an importable variable stored in ``base_learner``. The base + learner will be set to the appropriate hyperparameters through ``set_params``. + + Args: + filepath (str, unicode): File path to save file in + """ + self.base_learner_origin.export_as_file(filepath, self.hyperparameters) + class StackedEnsemble(Base): """This table contains StackedEnsembles created in the xcessiv notebook""" @@ -356,6 +390,94 @@ def return_secondary_learner(self): estimator = estimator.set_params(**self.secondary_learner_hyperparameters) return estimator + def export_as_package(self, package_path, cv_source): + """Exports the ensemble as a Python package and saves it to `package_path`. + + Args: + package_path (str, unicode): Absolute/local path of place to save package in + + cv_source (str, unicode): String containing actual code for base learner + cross-validation used to generate secondary meta-features. + + Raises: + exceptions.UserError: If os.path.join(path, name) already exists. + """ + if os.path.exists(package_path): + raise exceptions.UserError('{} already exists'.format(package_path)) + + package_name = os.path.basename(os.path.normpath(package_path)) + + os.makedirs(package_path) + + # Write __init__.py + with open(os.path.join(package_path, '__init__.py'), 'wb') as f: + f.write('from {}.builder import xcessiv_ensemble'.format(package_name).encode('utf8')) + + # Create package baselearners with each base learner having its own module + os.makedirs(os.path.join(package_path, 'baselearners')) + open(os.path.join(package_path, 'baselearners', '__init__.py'), 'a').close() + for idx, base_learner in enumerate(self.base_learners): + base_learner.export_as_file(os.path.join(package_path, + 'baselearners', + 'baselearner' + str(idx))) + + # Create metalearner.py containing secondary learner + self.base_learner_origin.export_as_file( + os.path.join(package_path, 'metalearner'), + self.secondary_learner_hyperparameters + ) + + # Create cv.py containing CV method for getting meta-features + with open(os.path.join(package_path, 'cv.py'), 'wb') as f: + f.write(cv_source.encode('utf8')) + + # Create stacker.py containing class for Xcessiv ensemble + ensemble_source = '' + stacker_file_loc = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'stacker.py') + with open(stacker_file_loc) as f: + ensemble_source += f.read() + + ensemble_source += '\n\n' \ + ' def {}(self, X):\n' \ + ' return self._process_using_' \ + 'meta_feature_generator(X, "{}")\n\n'\ + .format(self.base_learner_origin.meta_feature_generator, + self.base_learner_origin.meta_feature_generator) + + with open(os.path.join(package_path, 'stacker.py'), 'wb') as f: + f.write(ensemble_source.encode('utf8')) + + # Create builder.py containing file where `xcessiv_ensemble` is instantiated for import + builder_source = '' + + for idx, base_learner in enumerate(self.base_learners): + builder_source += 'from {}.baselearners import baselearner{}\n'.format(package_name, idx) + + builder_source += 'from {}.cv import return_splits_iterable\n'.format(package_name) + + builder_source += 'from {} import metalearner\n'.format(package_name) + + builder_source += 'from {}.stacker import XcessivStackedEnsemble\n'.format(package_name) + + builder_source += '\nbase_learners = [\n' + for idx, base_learner in enumerate(self.base_learners): + builder_source += ' baselearner{}.base_learner,\n'.format(idx) + builder_source += ']\n' + + builder_source += '\nmeta_feature_generators = [\n' + for idx, base_learner in enumerate(self.base_learners): + builder_source += ' baselearner{}.meta_feature_generator,\n'.format(idx) + builder_source += ']\n' + + builder_source += '\nxcessiv_ensemble = XcessivStackedEnsemble(base_learners=base_learners,' \ + ' meta_feature_generators=meta_feature_generators,' \ + ' secondary_learner=metalearner.base_learner,' \ + ' cv_function=return_splits_iterable,' \ + ' append_original={})\n'.format(self.append_original) + + with open(os.path.join(package_path, 'builder.py'), 'wb') as f: + f.write(builder_source.encode('utf8')) + @property def serialize(self): return dict( diff --git a/xcessiv/stacker.py b/xcessiv/stacker.py new file mode 100644 index 0000000..0ae3390 --- /dev/null +++ b/xcessiv/stacker.py @@ -0,0 +1,112 @@ +from __future__ import absolute_import, print_function, division, unicode_literals +from sklearn.pipeline import _BasePipeline +import numpy as np + + +class XcessivStackedEnsemble(_BasePipeline): + """Contains the class for the Xcessiv stacked ensemble""" + def __init__(self, base_learners, meta_feature_generators, + secondary_learner, cv_function, append_original): + super(XcessivStackedEnsemble, self).__init__() + + self.base_learners = base_learners + self.meta_feature_generators = meta_feature_generators + self.secondary_learner = secondary_learner + self.cv_function = cv_function + self.append_original = append_original + self._named_learners = [('bl{}'.format(idx), base_learner) for idx, base_learner + in enumerate(base_learners)] + self._named_learners.append(('secondary-learner', secondary_learner)) + + def get_params(self, deep=True): + """Get parameters for this estimator. + + Args: + + deep (boolean, optional): If True, will return the parameters for this estimator and + contained subobjects that are estimators. + + Returns + params: mapping of string to any Parameter names mapped to their values. + """ + return self._get_params('_named_learners', deep=deep) + + def set_params(self, **params): + """Set the parameters of this estimator.""" + self._set_params('_named_learners', **params) + return self + + def fit(self, X, y): + print('Fitting {} base learners'.format(len(self.base_learners))) + + all_learner_meta_features = [] + for idx, base_learner in enumerate(self.base_learners): + + single_learner_meta_features = [] + test_indices = [] + for num, (train_idx, test_idx) in enumerate(self.cv_function(X, y)): + print('Fold {} of base learner {}'.format(num+1, idx+1)) + + base_learner.fit(X[train_idx], y[train_idx]) + + preds = getattr(base_learner, self.meta_feature_generators[idx])(X[test_idx]) + + if len(preds.shape) == 1: + preds = preds.reshape(-1, 1) + + single_learner_meta_features.append( + preds + ) + + test_indices.append(test_idx) + + single_learner_meta_features = np.concatenate(single_learner_meta_features) + all_learner_meta_features.append(single_learner_meta_features) + + all_learner_meta_features = np.concatenate(all_learner_meta_features, axis=1) + test_indices = np.concatenate(test_indices) # reorganized order due to CV + + print('Fitting meta-learner') + + if self.append_original: + all_learner_meta_features = np.concatenate( + (all_learner_meta_features, X[test_indices]), + axis=1 + ) + + self.secondary_learner.fit(all_learner_meta_features, y[test_indices]) + + return self + + def _process_using_meta_feature_generator(self, X, meta_feature_generator): + """Process using secondary learner meta-feature generator + + Since secondary learner meta-feature generator can be anything e.g. predict, predict_proba, + this internal method gives the ability to use any string. Just make sure secondary learner + has the method. + + Args: + X (array-like): Features array + + meta_feature_generator (str, unicode): Method for use by secondary learner + """ + + all_learner_meta_features = [] + for idx, base_learner in enumerate(self.base_learners): + single_learner_meta_features = getattr(base_learner, + self.meta_feature_generators[idx])(X) + + if len(single_learner_meta_features.shape) == 1: + single_learner_meta_features = single_learner_meta_features.reshape(-1, 1) + all_learner_meta_features.append(single_learner_meta_features) + + all_learner_meta_features = np.concatenate(all_learner_meta_features, axis=1) + if self.append_original: + all_learner_meta_features = np.concatenate( + (all_learner_meta_features, X), + axis=1 + ) + + out = getattr(self.secondary_learner, meta_feature_generator)(all_learner_meta_features) + + return out diff --git a/xcessiv/ui/src/Ensemble/EnsembleMoreDetailsModal.js b/xcessiv/ui/src/Ensemble/EnsembleMoreDetailsModal.js index 16f4a17..e0359c2 100644 --- a/xcessiv/ui/src/Ensemble/EnsembleMoreDetailsModal.js +++ b/xcessiv/ui/src/Ensemble/EnsembleMoreDetailsModal.js @@ -1,7 +1,8 @@ import React, {Component} from 'react'; import './Ensemble.css'; import 'react-select/dist/react-select.css'; -import { Modal, Panel, Button, Alert } from 'react-bootstrap'; +import { Modal, Panel, Button, Alert, Form, + FormGroup, ControlLabel, FormControl } from 'react-bootstrap'; function DisplayError(props) { @@ -104,4 +105,54 @@ export class DeleteModal extends Component { } } +export class ExportModal extends Component { + constructor(props) { + super(props); + this.state = { + name: '' + }; + } + + handleYesAndClose() { + this.props.handleYes(this.state.name); + this.props.onRequestClose(); + } + + render() { + + return ( + + + Export ensemble as Python package + + +
{ + e.preventDefault(); + this.handleYesAndClose(); + }}> + + Name to use as package name + this.setState({name: evt.target.value})} + /> + +
+
+ + + + +
+ ) + } +} + export default DetailsModal; diff --git a/xcessiv/ui/src/Ensemble/ListEnsemble.js b/xcessiv/ui/src/Ensemble/ListEnsemble.js index ba426e8..fe9c12f 100644 --- a/xcessiv/ui/src/Ensemble/ListEnsemble.js +++ b/xcessiv/ui/src/Ensemble/ListEnsemble.js @@ -4,12 +4,31 @@ import 'fixed-data-table/dist/fixed-data-table.min.css'; import { Table, Column, Cell } from 'fixed-data-table'; import FaCheck from 'react-icons/lib/fa/check'; import FaTrash from 'react-icons/lib/fa/trash'; +import FaDownload from 'react-icons/lib/fa/download'; import FaSpinner from 'react-icons/lib/fa/spinner'; import FaExclamationCircle from 'react-icons/lib/fa/exclamation-circle' import FaInfo from 'react-icons/lib/fa/info'; import Dimensions from 'react-dimensions'; import Select from 'react-select'; -import DetailsModal, { DeleteModal } from './EnsembleMoreDetailsModal' +import DetailsModal, { DeleteModal, ExportModal } from './EnsembleMoreDetailsModal' + +function handleErrors(response) { + if (!response.ok) { + var error = new Error(response.statusText); + + // Unexpected error + if (response.status === 500) { + error.errMessage = 'Unexpected error'; + throw error; + } + return response.json() + .then(errorBody => { + error.errMessage = JSON.stringify(errorBody); + throw error; + }); + } + return response; +} function HeaderCell(props) { return ( @@ -36,7 +55,8 @@ class ListEnsemble extends Component { sortCol: 'id', sortType: null, moreDetailsId: null, - idToDelete: null + idToDelete: null, + idToExport: null }; this.sortedStackedEnsembles = this.props.stackedEnsembles; } @@ -175,6 +195,40 @@ class ListEnsemble extends Component { }); } + // Export an ensemble + exportEnsemble(id, name) { + var payload = {name}; + + fetch( + '/ensemble/stacked/' + id + '/export/?path=' + this.props.path, + { + method: "POST", + body: JSON.stringify( payload ), + headers: new Headers({ + 'Content-Type': 'application/json' + }) + } + ) + .then(handleErrors) + .then(response => response.json()) + .then(json => { + this.props.addNotification({ + title: 'Success', + message: json.message, + level: 'success' + }); + }) + .catch(error => { + console.log(error.message); + console.log(error.errMessage); + this.props.addNotification({ + title: error.message, + message: error.errMessage, + level: 'error' + }); + }); + } + render() { const metricsOptionsSet = new Set([]); for (let obj of this.props.stackedEnsembles) { @@ -260,6 +314,20 @@ class ListEnsemble extends Component { width={50} flexGrow={1} /> + { + + return ( + + this.setState({idToExport: this.sortedStackedEnsembles[props.rowIndex].id})} + /> + + ) + }} + width={50} + /> { @@ -300,6 +368,11 @@ class ListEnsemble extends Component { onRequestClose={() => this.setState({idToDelete: null})} handleYes={() => this.props.deleteStackedEnsemble(this.state.idToDelete)} /> + this.setState({idToExport: null})} + handleYes={(name) => this.exportEnsemble(this.state.idToExport, name)} + /> ) } diff --git a/xcessiv/ui/src/containers/ContainerBaseLearner.js b/xcessiv/ui/src/containers/ContainerBaseLearner.js index c74be4b..5339eff 100644 --- a/xcessiv/ui/src/containers/ContainerBaseLearner.js +++ b/xcessiv/ui/src/containers/ContainerBaseLearner.js @@ -676,6 +676,8 @@ class ContainerBaseLearner extends Component { this.createStackedEnsemble(this.state.checkedBaseLearners, bloId, hp, appendOriginal)} /> this.props.addNotification(notif)} stackedEnsembles={this.state.stackedEnsembles} deleteStackedEnsemble={(id) => this.deleteStackedEnsemble(id)} /> diff --git a/xcessiv/views.py b/xcessiv/views.py index adae852..55a318e 100644 --- a/xcessiv/views.py +++ b/xcessiv/views.py @@ -564,3 +564,24 @@ def specific_stacked_ensemble(id): session.delete(stacked_ensemble) session.commit() return jsonify(message='Deleted stacked ensemble') + + +@app.route('/ensemble/stacked//export/', methods=['POST']) +def export_stacked_ensemble(id): + path = functions.get_path_from_query_string(request) + + with functions.DBContextManager(path) as session: + stacked_ensemble = session.query(models.StackedEnsemble).filter_by(id=id).first() + if stacked_ensemble is None: + raise exceptions.UserError('Stacked ensemble {} not found'.format(id), 404) + + extraction = session.query(models.Extraction).first() + + if request.method == 'POST': + req_body = request.get_json() + stacked_ensemble.export_as_package(os.path.join(path, req_body['name']), + extraction.meta_feature_generation['source']) + return jsonify(message='Stacked ensemble successfully ' + 'exported as package {} in {}'.format( + req_body['name'], path + ))