Skip to content

Commit

Permalink
Merge pull request #27 from goeckslab/v0.8.0
Browse files Browse the repository at this point in the history
upgrade to v0.8.0
  • Loading branch information
qiagu committed Dec 13, 2019
2 parents a65c3af + d6c6607 commit 95f2c7d
Show file tree
Hide file tree
Showing 74 changed files with 747 additions and 482 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
command: |
. ~/venv/bin/activate
cd galaxy_ml/tools
TESTFILES=$(ls *[!macros].xml | circleci tests split)
TESTFILES=$(ls -I "*macros.xml" | grep .xml | circleci tests split)
planemo test ${TESTFILES}
workflows:
version: 2
Expand Down
5 changes: 4 additions & 1 deletion docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
### Version 0.8.0.dev (09-18-2019)
### Version 0.8.0 (12-10-2019)

#### New Features

Expand All @@ -11,6 +11,9 @@
- Adds keras training and evaluation tool.
- Adds support of decision_function for binarize target classifiers.
- Adds matplotlib svg format option in `ml_visualization_ex` tool.
- Adds 'sklearn.ensemble.HistGradientBoostingClassifier' and 'sklearn.ensemble.HistGradientBoostingRegressor'
- Adds new regression scorer `max_error`.
- Upgade scikit-lean to v0.21.3, mlxtend to v0.17.0, imbalanced-learn to v0.5.0, keras to v2.3.1 and tensorflow to v1.15.0.

#### Changes

Expand Down
2 changes: 1 addition & 1 deletion galaxy_ml/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.8.0.dev'
__version__ = '0.8.0'
2 changes: 1 addition & 1 deletion galaxy_ml/binarize_target/_iraps_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ class _BinarizeTargetProbaScorer
import six

from abc import ABCMeta
from joblib import Parallel, delayed
from scipy.stats import ttest_ind
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.feature_selection.univariate_selection import _BaseFilter
from sklearn.utils import as_float_array, check_X_y, check_random_state
from sklearn.utils._joblib import Parallel, delayed
from sklearn.utils.validation import (check_array, check_is_fitted,
column_or_1d, check_memory)

Expand Down
2 changes: 1 addition & 1 deletion galaxy_ml/feature_selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
import numpy as np

from joblib import Parallel, delayed, effective_n_jobs
from imblearn import under_sampling, over_sampling, combine
from imblearn.pipeline import Pipeline as imbPipeline
from sklearn import (cluster, compose, decomposition, ensemble,
Expand All @@ -20,7 +21,6 @@
from sklearn.model_selection import check_cv
from sklearn.metrics.scorer import check_scoring
from sklearn.utils import check_X_y, safe_indexing, safe_sqr
from sklearn.utils._joblib import Parallel, delayed, effective_n_jobs


__all__ = ('DyRFE', 'DyRFECV', '_MyPipeline', '_MyimbPipeline',
Expand Down
75 changes: 25 additions & 50 deletions galaxy_ml/keras_galaxy_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,6 @@ class BaseKerasModel(six.with_metaclass(ABCMeta, BaseEstimator)):
optimizer parameter, default change with `optimizer`
rho : None or float
optimizer parameter, default change with `optimizer`
epsilon : None or float
optimizer parameter, default change with `optimizer`
amsgrad : None or bool
for optimizer `adam` only, ignored otherwise
beta_1 : None or float
Expand Down Expand Up @@ -408,8 +406,8 @@ class BaseKerasModel(six.with_metaclass(ABCMeta, BaseEstimator)):
def __init__(self, config, model_type='sequential',
optimizer='sgd', loss='binary_crossentropy',
metrics=[], lr=None, momentum=None, decay=None,
nesterov=None, rho=None, epsilon=None, amsgrad=None,
beta_1=None, beta_2=None, schedule_decay=None, epochs=1,
nesterov=None, rho=None, amsgrad=None, beta_1=None,
beta_2=None, schedule_decay=None, epochs=1,
batch_size=None, seed=None, callbacks=None,
validation_data=None, steps_per_epoch=None,
validation_steps=None, verbose=0, **fit_params):
Expand Down Expand Up @@ -440,40 +438,34 @@ def __init__(self, config, model_type='sequential',
elif self.optimizer == 'rmsprop':
self.lr = 0.001 if lr is None else lr
self.rho = 0.9 if rho is None else rho
self.epsilon = None if epsilon is None else epsilon
self.decay = 0 if decay is None else decay

elif self.optimizer == 'adagrad':
self.lr = 0.01 if lr is None else lr
self.epsilon = None if epsilon is None else epsilon
self.decay = 0 if decay is None else decay

elif self.optimizer == 'adadelta':
self.lr = 1.0 if lr is None else lr
self.rho = 0.95 if rho is None else rho
self.epsilon = None if epsilon is None else epsilon
self.decay = 0 if decay is None else decay

elif self.optimizer == 'adam':
self.lr = 0.001 if lr is None else lr
self.beta_1 = 0.9 if beta_1 is None else beta_1
self.beta_2 = 0.999 if beta_2 is None else beta_2
self.epsilon = None if epsilon is None else epsilon
self.decay = 0 if decay is None else decay
self.amsgrad = False if amsgrad is None else amsgrad

elif self.optimizer == 'adamax':
self.lr = 0.002 if lr is None else lr
self.beta_1 = 0.9 if beta_1 is None else beta_1
self.beta_2 = 0.999 if beta_2 is None else beta_2
self.epsilon = None if epsilon is None else epsilon
self.decay = 0 if decay is None else decay

elif self.optimizer == 'nadam':
self.lr = 0.002 if lr is None else lr
self.beta_1 = 0.9 if beta_1 is None else beta_1
self.beta_2 = 0.999 if beta_2 is None else beta_2
self.epsilon = None if epsilon is None else epsilon
self.schedule_decay = 0.004 if schedule_decay is None\
else schedule_decay

Expand All @@ -495,30 +487,21 @@ def _optimizer(self):
elif self.optimizer == 'rmsprop':
if not hasattr(self, 'rho'):
self.rho = 0.9
if not hasattr(self, 'epsilon'):
self.epsilon = None
if not hasattr(self, 'decay'):
self.decay = 0
return RMSprop(lr=self.lr, rho=self.rho,
epsilon=self.epsilon, decay=self.decay)
return RMSprop(lr=self.lr, rho=self.rho, decay=self.decay)

elif self.optimizer == 'adagrad':
if not hasattr(self, 'epsilon'):
self.epsilon = None
if not hasattr(self, 'decay'):
self.decay = 0
return Adagrad(lr=self.lr, epsilon=self.epsilon,
decay=self.decay)
return Adagrad(lr=self.lr, decay=self.decay)

elif self.optimizer == 'adadelta':
if not hasattr(self, 'rho'):
self.rho = 0.95
if not hasattr(self, 'epsilon'):
self.epsilon = None
if not hasattr(self, 'decay'):
self.decay = 0
return Adadelta(lr=self.lr, rho=self.rho,
epsilon=self.epsilon,
decay=self.decay)

elif self.optimizer == 'adam':
Expand All @@ -528,13 +511,11 @@ def _optimizer(self):
self.beta_2 = 0.999
if not hasattr(self, 'decay'):
self.decay = 0
if not hasattr(self, 'epsilon'):
self.epsilon = None
if not hasattr(self, 'amsgrad'):
self.amsgrad = False
return Adam(lr=self.lr, beta_1=self.beta_1,
beta_2=self.beta_2, epsilon=self.epsilon,
decay=self.decay, amsgrad=self.amsgrad)
beta_2=self.beta_2, decay=self.decay,
amsgrad=self.amsgrad)

elif self.optimizer == 'adamax':
if not hasattr(self, 'beta_1'):
Expand All @@ -543,23 +524,19 @@ def _optimizer(self):
self.beta_2 = 0.999
if not hasattr(self, 'decay'):
self.decay = 0
if not hasattr(self, 'epsilon'):
self.epsilon = None
return Adamax(lr=self.lr, beta_1=self.beta_1,
beta_2=self.beta_2, epsilon=self.epsilon,
beta_2=self.beta_2,
decay=self.decay)

elif self.optimizer == 'nadam':
if not hasattr(self, 'beta_1'):
self.beta_1 = 0.9
if not hasattr(self, 'beta_2'):
self.beta_2 = 0.999
if not hasattr(self, 'epsilon'):
self.epsilon = None
if not hasattr(self, 'schedule_decay'):
self.schedule_decay = 0.004
return Nadam(lr=self.lr, beta_1=self.beta_1,
beta_2=self.beta_2, epsilon=self.epsilon,
beta_2=self.beta_2,
schedule_decay=self.schedule_decay)

@property
Expand Down Expand Up @@ -594,7 +571,7 @@ def _callbacks(self):

curr_dir = __import__('os').getcwd()

if callback_type == 'None':
if callback_type in ('None', ''):
continue
elif callback_type == 'ModelCheckpoint':
if not params.get('filepath', None):
Expand Down Expand Up @@ -628,17 +605,18 @@ def _fit(self, X, y, **kwargs):
if self.seed is not None:
np.random.seed(self.seed)
random.seed(self.seed)
tf.set_random_seed(self.seed)
tf.compat.v1.set_random_seed(self.seed)
intra_op = 1
inter_op = 1

session_conf = tf.ConfigProto(
session_conf = tf.compat.v1.ConfigProto(
intra_op_parallelism_threads=intra_op,
inter_op_parallelism_threads=inter_op,
log_device_placement=bool(self.verbose))

sess = tf.Session(graph=tf.get_default_graph(),
config=session_conf)
sess = tf.compat.v1.Session(
graph=tf.compat.v1.get_default_graph(),
config=session_conf)
K.set_session(sess)

config = self.config
Expand Down Expand Up @@ -976,8 +954,6 @@ class KerasGBatchClassifier(KerasGClassifier):
optimizer parameter, default change with `optimizer`
rho : None or float
optimizer parameter, default change with `optimizer`
epsilon : None or float
optimizer parameter, default change with `optimizer`
amsgrad : None or bool
for optimizer `adam` only, ignored otherwise
beta_1 : None or float
Expand Down Expand Up @@ -1025,18 +1001,17 @@ def __init__(self, config, data_batch_generator,
model_type='sequential', optimizer='sgd',
loss='binary_crossentropy', metrics=[], lr=None,
momentum=None, decay=None, nesterov=None, rho=None,
epsilon=None, amsgrad=None, beta_1=None,
beta_2=None, schedule_decay=None, epochs=1,
batch_size=None, seed=None, n_jobs=1,
callbacks=None, validation_data=None,
steps_per_epoch=None, validation_steps=None,
verbose=0, prediction_steps=None,
class_positive_factor=1,
amsgrad=None, beta_1=None, beta_2=None,
schedule_decay=None, epochs=1, batch_size=None,
seed=None, n_jobs=1, callbacks=None,
validation_data=None, steps_per_epoch=None,
validation_steps=None, verbose=0,
prediction_steps=None, class_positive_factor=1,
**fit_params):
super(KerasGBatchClassifier, self).__init__(
config, model_type=model_type, optimizer=optimizer,
loss=loss, metrics=metrics, lr=lr, momentum=momentum,
decay=decay, nesterov=nesterov, rho=rho, epsilon=epsilon,
decay=decay, nesterov=nesterov, rho=rho,
amsgrad=amsgrad, beta_1=beta_1, beta_2=beta_2,
schedule_decay=schedule_decay, epochs=epochs,
batch_size=batch_size, seed=seed, callbacks=callbacks,
Expand All @@ -1060,17 +1035,17 @@ def fit(self, X, y=None, class_weight=None, sample_weight=None, **kwargs):
if self.seed is not None:
np.random.seed(self.seed)
random.seed(self.seed)
tf.set_random_seed(self.seed)
tf.compat.v1.set_random_seed(self.seed)
intra_op = 1
inter_op = 1

session_conf = tf.ConfigProto(
session_conf = tf.compat.v1.ConfigProto(
intra_op_parallelism_threads=intra_op,
inter_op_parallelism_threads=inter_op,
log_device_placement=bool(self.verbose))

sess = tf.Session(graph=tf.get_default_graph(),
config=session_conf)
sess = tf.compat.v1.Session(
graph=tf.compat.v1.get_default_graph(), config=session_conf)
K.set_session(sess)

check_params(kwargs, Model.fit_generator)
Expand Down
8 changes: 8 additions & 0 deletions galaxy_ml/model_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,14 @@ def _fit_and_score(estimator, X, y, scorer, train, test, verbose,
score_time = 0.0
if error_score == 'raise':
raise
elif error_score == 'raise-deprecating':
warnings.warn("From version 0.22, errors during fit will result "
"in a cross validation score of NaN by default. Use "
"error_score='raise' if you want an exception "
"raised or error_score=np.nan to adopt the "
"behavior from version 0.22.",
FutureWarning)
raise
elif isinstance(error_score, numbers.Number):
if is_multimetric:
test_scores = dict(zip(scorer.keys(),
Expand Down
2 changes: 2 additions & 0 deletions galaxy_ml/pk_whitelist.json
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@
"sklearn.ensemble.voting_classifier._parallel_fit_estimator", "sklearn.ensemble.weight_boosting.AdaBoostClassifier",
"sklearn.ensemble.weight_boosting.AdaBoostRegressor", "sklearn.ensemble.weight_boosting.BaseWeightBoosting",
"sklearn.ensemble.weight_boosting._samme_proba", "sklearn.ensemble.weight_boosting.inner1d",
"sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor",
"sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingClassifier",
"sklearn.feature_extraction.DictVectorizer", "sklearn.feature_extraction.FeatureHasher",
"sklearn.feature_extraction._hashing.transform", "sklearn.feature_extraction.dict_vectorizer.DictVectorizer",
"sklearn.feature_extraction.dict_vectorizer._tosequence", "sklearn.feature_extraction.grid_to_graph",
Expand Down
Loading

0 comments on commit 95f2c7d

Please sign in to comment.