diff --git a/README.md b/README.md index 03c8aab..c7f6b31 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ Xcessiv holds your hand through all the implementation details of creating and o * Easy management and comparison of hundreds of different model-hyperparameter combinations * Automatic saving of generated secondary meta-features * Stacked ensemble creation in a few clicks -* Export your stacked ensemble as a standalone Python package +* Export your stacked ensemble as a standalone Python file to support multiple levels of stacking ## Installation and Documentation diff --git a/docs/index.rst b/docs/index.rst index ea54cb5..e13404a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,7 +20,7 @@ Features * Easy management and comparison of hundreds of different model-hyperparameter combinations * Automatic saving of generated secondary meta-features * Stacked ensemble creation in a few clicks -* Export your stacked ensemble as a standalone Python package +* Export your stacked ensemble as a standalone Python file to support multiple levels of stacking ---------------- diff --git a/docs/walkthrough.rst b/docs/walkthrough.rst index acfa7b7..b668c52 100644 --- a/docs/walkthrough.rst +++ b/docs/walkthrough.rst @@ -431,42 +431,51 @@ Normally, it would take a lot of extraneous code just to set things up and keep Exporting your stacked ensemble ------------------------------- -Let's say that after trying out different stacked ensemble combinations, you think you've found the one. It wouldn't be very useful if you didn't have a way to use it on other data to generate predictions. Xcessiv offers a way to convert any stacked ensemble into an importable Python package. Click on the export icon of your chosen ensemble, and enter a unique package name to save your package as. +As a Python file +~~~~~~~~~~~~~~~~ -Give your package name a unique name that conforms to Python package naming conventions. For example, we obviously wouldn't want to name our package "numpy" or "my.package". In this walkthrough, we might save our package as "DigitsDataEnsemble1". +Let's say that after trying out different stacked ensemble combinations, you think you've found the one. It wouldn't be very useful if you didn't have a way to use it on other data to generate predictions. Xcessiv offers a way to convert any stacked ensemble into an importable Python file. Click on the export icon of your chosen ensemble, and enter a unique name to save your file as. -On successful export, Xcessiv will automatically save your package inside your project folder. +In this walkthrough, we'll save our ensemble as "myensemble.py". -Your ensemble can then be imported from :class:`DigitsDataEnsemble1` like this.:: +On successful export, Xcessiv will automatically save your Python file inside your project folder. - # Make sure DigitsDataEnsemble1 is importable - from DigitsDataEnsemble1 import xcessiv_ensemble +Your ensemble can then be imported from :class:`myensemble.py` like this.:: -``xcessiv_ensemble`` will then contain a stacked ensemble instance with the methods ``get_params``, ``set_params``, ``fit``, and the ensemble's secondary learner's meta-feature generator method. For example, if your secondary learner's meta-feature generator method is ``predict``, you'll be able to call :func:`xcessiv_ensemble.predict` after fitting. + # Make sure myensemble.py is importable + from myensemble import base_learner + +``base_learner`` will then contain a stacked ensemble instance with the methods ``get_params``, ``set_params``, ``fit``, and the ensemble's secondary learner's meta-feature generator method. For example, if your secondary learner's meta-feature generator method is ``predict``, you'll be able to call :func:`base_learner.predict` after fitting. Here's an example of how you'd normally use an imported ensemble.:: - from DigitsDataEnsemble1 import xcessiv_ensemble + from myensemble import base_learner # Fit all base learners and secondary learner on training data - xcessiv_ensemble.fit(X_train, y_train) + base_learner.fit(X_train, y_train) # Generate some predictions on test/unseen data - predictions = xcessiv_ensemble.predict(X_test) + predictions = base_learner.predict(X_test) -Most common use cases for ``xcessiv_ensemble`` will involve using a method other than the configured meta-feature generator. Take the case of using :class:`sklearn.linear_model.LogisticRegression` as our secondary learner. :class:`sklearn.linear_model.LogisticRegression` has both methods :func:`predict` and :func:`predict_proba`, but if our meta-feature generator is set to :func:`predict`, Xcessiv doesn't know :func:`predict_proba` actually exists and only :func:`xcessiv_ensemble.predict` will be a valid method. For these cases, ``xcessiv_ensemble`` exposes a method :func:`_process_using_meta_feature_generator` you can use in the following way.:: +Most common use cases for ``base_learner`` will involve using a method other than the configured meta-feature generator. Take the case of using :class:`sklearn.linear_model.LogisticRegression` as our secondary learner. :class:`sklearn.linear_model.LogisticRegression` has both methods :func:`predict` and :func:`predict_proba`, but if our meta-feature generator is set to :func:`predict`, Xcessiv doesn't know :func:`predict_proba` actually exists and only :func:`base_learner.predict` will be a valid method. For these cases, ``base_learner`` exposes a method :func:`_process_using_meta_feature_generator` you can use in the following way.:: - from DigitsDataEnsemble1 import xcessiv_ensemble + from myensemble import base_learner # Fit all base learners and secondary learner on training data - xcessiv_ensemble.fit(X_train, y_train) + base_learner.fit(X_train, y_train) # Generate some prediction probabilities on test/unseen data - probas = xcessiv_ensemble._process_using_meta_feature_generator(X_test, 'predict_proba') + probas = base_learner._process_using_meta_feature_generator(X_test, 'predict_proba') + +As a standalone base learner setup +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You'll notice that ``base_learner`` follows the **scikit-learn** interface for estimators. That means you'll be able to use it as its own standalone base learner. If you're crazy enough, you can even try *stacking together already stacked ensembles*. + +In fact, Xcessiv has built in functionality to directly export your stacked ensemble as a standalone base learner setup. -You'll notice that ``xcessiv_ensemble`` follows the **scikit-learn** interface for estimators. That means you'll be able to use it as its own standalone base learner. If you're crazy enough, you can even try *stacking together already stacked ensembles*. For now, the recommended way of quickly adding your stacked ensemble as a separate base learner is to write something like this in your base learner setup.:: +In the **Export ensemble** modal, simply click on **Export as separate base learner setup**. A new base learner setup will be created containing source code for the selected stacked ensemble. At this point, you'll be able to use it just like any other base learner. Rename it, add any relevant metrics, tune it, and stack it! - # Make sure DigitsDataEnsemble1 is importable - from DigitsDataEnsemble1 import xcessiv_ensemble +.. warning:: - base_learner = xcessiv_ensemble + Xcessiv's export functionality works by simply concatenating the source code for the different base learners and your cross-validation scheme. While this is not a problem in most cases, things *can* break. For example, if a base learner's source code starts with ``from __future__ import``, it will *not* end up on the first line and this will need to be manually edited out in the exported file. diff --git a/xcessiv/models.py b/xcessiv/models.py index 09489b3..77a1fab 100644 --- a/xcessiv/models.py +++ b/xcessiv/models.py @@ -1,5 +1,7 @@ """This module contains the SQLAlchemy ORM Models""" from __future__ import absolute_import, print_function, division, unicode_literals +import random +import string from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import Column, Text, Integer, Boolean, TypeDecorator, ForeignKey, Table from sqlalchemy.orm import relationship @@ -391,6 +393,102 @@ def return_secondary_learner(self): estimator = estimator.set_params(**self.secondary_learner_hyperparameters) return estimator + def export_as_code(self, cv_source): + """Returns a string value that contains the Python code for the ensemble + + Args: + cv_source (str, unicode): String containing actual code for base learner + cross-validation used to generate secondary meta-features. + + Returns: + base_learner_code (str, unicode): String that can be used as Python code + """ + + rand_value = ''.join(random.choice(string.ascii_uppercase + string.digits) + for _ in range(25)) + + base_learner_code = '' + base_learner_code += 'base_learner_list_{} = []\n'.format(rand_value) + base_learner_code += 'meta_feature_generators_list_{} = []\n\n'.format(rand_value) + for idx, base_learner in enumerate(self.base_learners): + base_learner_code += '################################################\n' + base_learner_code += '###### Code for building base learner {} ########\n'.format(idx+1) + base_learner_code += '################################################\n' + base_learner_code += base_learner.base_learner_origin.source + base_learner_code += '\n\n' + base_learner_code += 'base_learner' \ + '.set_params(**{})\n'.format(base_learner.hyperparameters) + base_learner_code += 'base_learner_list_{}.append(base_learner)\n'.format(rand_value) + base_learner_code += 'meta_feature_generators_list_{}.append("{}")\n'.format( + rand_value, + base_learner.base_learner_origin.meta_feature_generator + ) + base_learner_code += '\n\n' + + base_learner_code += '################################################\n' + base_learner_code += '##### Code for building secondary learner ######\n' + base_learner_code += '################################################\n' + base_learner_code += self.base_learner_origin.source + base_learner_code += '\n\n' + base_learner_code += 'base_learner' \ + '.set_params(**{})\n'.format(self.secondary_learner_hyperparameters) + base_learner_code += 'secondary_learner_{} = base_learner\n'.format(rand_value) + base_learner_code += '\n\n' + + base_learner_code += '################################################\n' + base_learner_code += '############## Code for CV method ##############\n' + base_learner_code += '################################################\n' + base_learner_code += cv_source + base_learner_code += '\n\n' + + base_learner_code += '################################################\n' + base_learner_code += '######## Code for Xcessiv stacker class ########\n' + base_learner_code += '################################################\n' + stacker_file_loc = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'stacker.py') + with open(stacker_file_loc) as f2: + base_learner_code += f2.read() + + base_learner_code += '\n\n' \ + ' def {}(self, X):\n' \ + ' return self._process_using_' \ + 'meta_feature_generator(X, "{}")\n\n'\ + .format(self.base_learner_origin.meta_feature_generator, + self.base_learner_origin.meta_feature_generator) + + base_learner_code += '\n\n' + + base_learner_code += 'base_learner = XcessivStackedEnsemble' \ + '(base_learners=base_learner_list_{},' \ + ' meta_feature_generators=meta_feature_generators_list_{},' \ + ' secondary_learner=secondary_learner_{},' \ + ' cv_function=return_splits_iterable,' \ + ' append_original={})\n'.format( + rand_value, + rand_value, + rand_value, + self.append_original + ) + + return base_learner_code + + def export_as_file(self, file_path, cv_source): + """Export the ensemble as a single Python file and saves it to `file_path`. + + This is EXPERIMENTAL as putting different modules together would probably wreak havoc + especially on modules that make heavy use of global variables. + + Args: + file_path (str, unicode): Absolute/local path of place to save file in + + cv_source (str, unicode): String containing actual code for base learner + cross-validation used to generate secondary meta-features. + """ + if os.path.exists(file_path): + raise exceptions.UserError('{} already exists'.format(file_path)) + + with open(file_path, 'wb') as f: + f.write(self.export_as_code(cv_source).encode('utf8')) + def export_as_package(self, package_path, cv_source): """Exports the ensemble as a Python package and saves it to `package_path`. diff --git a/xcessiv/stacker.py b/xcessiv/stacker.py index 0ae3390..2d80095 100644 --- a/xcessiv/stacker.py +++ b/xcessiv/stacker.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, print_function, division, unicode_literals from sklearn.pipeline import _BasePipeline import numpy as np diff --git a/xcessiv/ui/src/Ensemble/EnsembleMoreDetailsModal.js b/xcessiv/ui/src/Ensemble/EnsembleMoreDetailsModal.js index e0359c2..277c93f 100644 --- a/xcessiv/ui/src/Ensemble/EnsembleMoreDetailsModal.js +++ b/xcessiv/ui/src/Ensemble/EnsembleMoreDetailsModal.js @@ -113,11 +113,6 @@ export class ExportModal extends Component { }; } - handleYesAndClose() { - this.props.handleYes(this.state.name); - this.props.onRequestClose(); - } - render() { return ( @@ -126,7 +121,7 @@ export class ExportModal extends Component { onHide={this.props.onRequestClose} > - Export ensemble as Python package + Export ensemble as Python file
{ @@ -136,7 +131,7 @@ export class ExportModal extends Component { - Name to use as package name + Name to use as filename this.setState({name: evt.target.value})} @@ -145,8 +140,17 @@ export class ExportModal extends Component {
- + diff --git a/xcessiv/ui/src/Ensemble/ListEnsemble.js b/xcessiv/ui/src/Ensemble/ListEnsemble.js index fe9c12f..c506e41 100644 --- a/xcessiv/ui/src/Ensemble/ListEnsemble.js +++ b/xcessiv/ui/src/Ensemble/ListEnsemble.js @@ -198,6 +198,7 @@ class ListEnsemble extends Component { // Export an ensemble exportEnsemble(id, name) { var payload = {name}; + payload.type = 'file'; fetch( '/ensemble/stacked/' + id + '/export/?path=' + this.props.path, @@ -371,7 +372,8 @@ class ListEnsemble extends Component { this.setState({idToExport: null})} - handleYes={(name) => this.exportEnsemble(this.state.idToExport, name)} + exportEnsemble={(name) => this.exportEnsemble(this.state.idToExport, name)} + exportEnsembleToBaseLearnerOrigin={() => this.props.exportEnsembleToBaseLearnerOrigin(this.state.idToExport)} /> ) diff --git a/xcessiv/ui/src/containers/ContainerBaseLearner.js b/xcessiv/ui/src/containers/ContainerBaseLearner.js index 007a7af..b93ab69 100644 --- a/xcessiv/ui/src/containers/ContainerBaseLearner.js +++ b/xcessiv/ui/src/containers/ContainerBaseLearner.js @@ -613,6 +613,46 @@ class ContainerBaseLearner extends Component { }); } + // Export an ensemble + exportEnsembleToBaseLearnerOrigin(id) { + var payload = {}; + + fetch( + '/ensemble/stacked/' + id + '/export-new-blo/?path=' + this.props.path, + { + method: "POST", + body: JSON.stringify( payload ), + headers: new Headers({ + 'Content-Type': 'application/json' + }) + } + ) + .then(handleErrors) + .then(response => response.json()) + .then(json => { + console.log(json); + this.setState((prevState) => { + var baseLearnerOrigins = prevState.baseLearnerOrigins.slice(); + baseLearnerOrigins.push(json); + return {baseLearnerOrigins}; + }); + this.props.addNotification({ + title: 'Success', + message: 'Exported ensemble as new base learner type', + level: 'success' + }); + }) + .catch(error => { + console.log(error.message); + console.log(error.errMessage); + this.props.addNotification({ + title: error.message, + message: error.errMessage, + level: 'error' + }); + }); + } + render() { const checkedOptions = this.state.checkedBaseLearners.toJS().map((val) => { return { @@ -683,6 +723,7 @@ class ContainerBaseLearner extends Component { addNotification={(notif) => this.props.addNotification(notif)} stackedEnsembles={this.state.stackedEnsembles} deleteStackedEnsemble={(id) => this.deleteStackedEnsemble(id)} + exportEnsembleToBaseLearnerOrigin={(id) => this.exportEnsembleToBaseLearnerOrigin(id)} /> ) diff --git a/xcessiv/views.py b/xcessiv/views.py index 8995a17..d695f15 100644 --- a/xcessiv/views.py +++ b/xcessiv/views.py @@ -600,9 +600,40 @@ def export_stacked_ensemble(id): if request.method == 'POST': req_body = request.get_json() - stacked_ensemble.export_as_package(os.path.join(path, req_body['name']), - extraction.meta_feature_generation['source']) + if req_body['type'] == 'package': + stacked_ensemble.export_as_package(os.path.join(path, req_body['name']), + extraction.meta_feature_generation['source']) + elif req_body['type'] == 'file': + if not req_body['name'].endswith('.py'): + req_body['name'] += '.py' + stacked_ensemble.export_as_file(os.path.join(path, req_body['name']), + extraction.meta_feature_generation['source']) return jsonify(message='Stacked ensemble successfully ' - 'exported as package {} in {}'.format( + 'exported as {} in {}'.format( req_body['name'], path )) + + +@app.route('/ensemble/stacked//export-new-blo/', methods=['POST']) +def export_stacked_ensemble_as_base_learner_origin(id): + path = functions.get_path_from_query_string(request) + + with functions.DBContextManager(path) as session: + stacked_ensemble = session.query(models.StackedEnsemble).filter_by(id=id).first() + if stacked_ensemble is None: + raise exceptions.UserError('Stacked ensemble {} not found'.format(id), 404) + + extraction = session.query(models.Extraction).first() + + if request.method == 'POST': + source = stacked_ensemble.export_as_code(extraction.meta_feature_generation['source']) + + new_base_learner_origin = models.BaseLearnerOrigin( + source=source, + name='Xcessiv Ensemble', + meta_feature_generator=stacked_ensemble.base_learner_origin.meta_feature_generator + ) + + session.add(new_base_learner_origin) + session.commit() + return jsonify(new_base_learner_origin.serialize)