Skip to content

Commit

Permalink
Merge pull request #54 from JuliaAI/dev
Browse files Browse the repository at this point in the history
For a 0.3.0 release
  • Loading branch information
ablaom authored Feb 28, 2023
2 parents 16c7814 + 9d9f4dc commit 075403e
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 56 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ name: CI

env:
PYTHON: Conda
# remove next line (and others marked below) when #42 properly resolved
LD_LIBRARY_PATH: /home/runner/.julia/conda/3/lib

on:
pull_request:
Expand Down Expand Up @@ -46,12 +44,14 @@ jobs:
${{ runner.os }}-test-
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
# remove next four lines (and one other marked above) when #42 properly resolved
- name: "Install Conda"
run: julia -e 'using Pkg; Pkg.add("Conda");'
- name: "Install Scikit-learn"
run: cd $LD_LIBRARY_PATH
# The following is needed for Julia <=0.8.4 on Linux OS
# due to old version of libstcxx used by Julia
- name: "Export LD_LIBRARY_PATH envrioment variable"
if: ${{matrix.version == '1.6'}}
run: echo "LD_LIBRARY_PATH=/home/runner/.julia/conda/3/x86_64/lib" >> $GITHUB_ENV
- uses: julia-actions/julia-runtest@v1
env:
LD_LIBRARY_PATH: /home/runner/.julia/conda/3/x86_64/lib
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v1
with:
Expand Down
7 changes: 4 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJScikitLearnInterface"
uuid = "5ae90465-5518-4432-b9d2-8a1def2f0cab"
authors = ["Thibaut Lienart, Anthony Blaom"]
version = "0.2.0"
version = "0.3.0"

[deps]
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
Expand All @@ -11,13 +11,14 @@ ScikitLearn = "3646fa90-6ef7-5e7e-9f22-8aca16db6324"
[compat]
MLJModelInterface = "1.4"
PyCall = "1"
ScikitLearn = "0.5,0.6"
ScikitLearn = "0.7"
julia = "1.6"

[extras]
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["StableRNGs", "Test", "MLJBase"]
test = ["StableRNGs", "MLJTestInterface", "Test", "MLJBase"]
3 changes: 2 additions & 1 deletion src/models/clustering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ const KMeans_ = skcl(:KMeans)
verbose::Int = 0::(_ ≥ 0)
random_state::Any = nothing
copy_x::Bool = true
algorithm::String = "auto"::(_ in ("auto", "full", "elkane"))
## TODO: Remove the "auto" and "full" options when python sklearn releases v1.3
algorithm::String = "lloyd"::(_ in ("auto", "full", "elkane", "lloyd"))
# long
init::Union{AbstractArray,String} = "k-means++"::(_ isa AbstractArray || _ in ("k-means++", "random"))
end
Expand Down
44 changes: 25 additions & 19 deletions src/models/ensemble.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const AdaBoostRegressor_ = sken(:AdaBoostRegressor)
@sk_reg mutable struct AdaBoostRegressor <: MMI.Deterministic
base_estimator::Any = nothing
estimator::Any = nothing
n_estimators::Int = 50::(_ > 0)
learning_rate::Float64 = 1.0::(_ > 0)
loss::String = "linear"::(_ in ("linear","square","exponential"))
Expand All @@ -17,7 +17,7 @@ add_human_name_trait(AdaBoostRegressor, "AdaBoost ensemble regression")
# ----------------------------------------------------------------------------
const AdaBoostClassifier_ = sken(:AdaBoostClassifier)
@sk_clf mutable struct AdaBoostClassifier <: MMI.Probabilistic
base_estimator::Any = nothing
estimator::Any = nothing
n_estimators::Int = 50::(_ > 0)
learning_rate::Float64 = 1.0::(_ > 0)
algorithm::String = "SAMME.R"::(_ in ("SAMME", "SAMME.R"))
Expand All @@ -39,7 +39,7 @@ meta(AdaBoostClassifier,
# ============================================================================
const BaggingRegressor_ = sken(:BaggingRegressor)
@sk_reg mutable struct BaggingRegressor <: MMI.Deterministic
base_estimator::Any = nothing
estimator::Any = nothing
n_estimators::Int = 10::(_>0)
max_samples::Union{Int,Float64} = 1.0::(_>0)
max_features::Union{Int,Float64} = 1.0::(_>0)
Expand All @@ -63,7 +63,7 @@ add_human_name_trait(BaggingRegressor, "bagging ensemble regressor")
# ----------------------------------------------------------------------------
const BaggingClassifier_ = sken(:BaggingClassifier)
@sk_clf mutable struct BaggingClassifier <: MMI.Probabilistic
base_estimator::Any = nothing
estimator::Any = nothing
n_estimators::Int = 10::(_>0)
max_samples::Union{Int,Float64} = 1.0::(_>0)
max_features::Union{Int,Float64} = 1.0::(_>0)
Expand Down Expand Up @@ -95,11 +95,11 @@ meta(BaggingClassifier,
# ============================================================================
const GradientBoostingRegressor_ = sken(:GradientBoostingRegressor)
@sk_reg mutable struct GradientBoostingRegressor <: MMI.Deterministic
loss::String = "ls"::(_ in ("ls","lad","huber","quantile"))
loss::String = "squared_error"::(_ in ("squared_error","absolute_error","huber","quantile"))
learning_rate::Float64 = 0.1::(_>0)
n_estimators::Int = 100::(_>0)
subsample::Float64 = 1.0::(_>0)
criterion::String = "friedman_mse"::(_ in ("mse","mae","friedman_mse"))
criterion::String = "friedman_mse"::(_ in ("squared_error","friedman_mse"))
min_samples_split::Union{Int,Float64} = 2::(_>0)
min_samples_leaf::Union{Int,Float64} = 1::(_>0)
min_weight_fraction_leaf::Float64 = 0.0::(_≥0)
Expand Down Expand Up @@ -130,7 +130,8 @@ add_human_name_trait(GradientBoostingRegressor, "gradient boosting ensemble regr
# ----------------------------------------------------------------------------
const GradientBoostingClassifier_ = sken(:GradientBoostingClassifier)
@sk_clf mutable struct GradientBoostingClassifier <: MMI.Probabilistic
loss::String = "deviance"::(_ in ("deviance","exponential"))
# TODO: Remove "deviance" when python sklearn releases v1.3.0
loss::String = "log_loss"::(_ in ("deviance", "log_loss","exponential"))
learning_rate::Float64 = 0.1::(_>0)
n_estimators::Int = 100::(_>0)
subsample::Float64 = 1.0::(_>0)
Expand All @@ -155,6 +156,7 @@ MMI.fitted_params(m::GradientBoostingClassifier, (f, _, _)) = (
n_estimators = f.n_estimators_,
feature_importances = f.feature_importances_,
train_score = f.train_score_,
## TODO: Remove the `loss_` attribute when python sklearn releases v1.3
loss = f.loss_,
init = f.init_,
estimators = f.estimators_,
Expand All @@ -170,12 +172,13 @@ meta(GradientBoostingClassifier,
const RandomForestRegressor_ = sken(:RandomForestRegressor)
@sk_reg mutable struct RandomForestRegressor <: MMI.Deterministic
n_estimators::Int = 100::(_ > 0)
criterion::String = "mse"::(_ in ("mae", "mse"))
criterion::String = "squared_error"::(_ in ("squared_error","absolute_error", "friedman_mse", "poisson"))
max_depth::Option{Int} = nothing::(_ === nothing || _ > 0)
min_samples_split::Union{Int,Float64} = 2::(_ > 0)
min_samples_leaf::Union{Int,Float64} = 1::(_ > 0)
min_weight_fraction_leaf::Float64 = 0.0::(_ ≥ 0)
max_features::Union{Int,Float64,String,Nothing} = "auto"::(_ === nothing || (isa(_, String) && (_ in ("auto","sqrt","log2"))) || _ > 0)
## TODO: Remove the "auto" option in python sklearn v1.3
max_features::Union{Int,Float64,String,Nothing} = "sqrt"::(_ === nothing || (isa(_, String) && (_ in ("auto","sqrt","log2"))) || _ > 0)
max_leaf_nodes::Option{Int} = nothing::(_ === nothing || _ > 0)
min_impurity_decrease::Float64 = 0.0::(_ ≥ 0)
bootstrap::Bool = true
Expand All @@ -191,7 +194,7 @@ end
MMI.fitted_params(model::RandomForestRegressor, (f, _, _)) = (
estimators = f.estimators_,
feature_importances = f.feature_importances_,
n_features = f.n_features_,
n_features = f.n_features_in_,
n_outputs = f.n_outputs_,
oob_score = model.oob_score ? f.oob_score_ : nothing,
oob_prediction = model.oob_score ? f.oob_prediction_ : nothing
Expand All @@ -206,12 +209,13 @@ meta(RandomForestRegressor,
const RandomForestClassifier_ = sken(:RandomForestClassifier)
@sk_clf mutable struct RandomForestClassifier <: MMI.Probabilistic
n_estimators::Int = 100::(_ > 0)
criterion::String = "gini"::(_ in ("gini","entropy"))
criterion::String = "gini"::(_ in ("gini","entropy", "log_loss"))
max_depth::Option{Int} = nothing::(_ === nothing || _ > 0)
min_samples_split::Union{Int,Float64} = 2::(_ > 0)
min_samples_leaf::Union{Int,Float64} = 1::(_ > 0)
min_weight_fraction_leaf::Float64 = 0.0::(_ ≥ 0)
max_features::Union{Int,Float64,String,Nothing} = "auto"::(_ === nothing || (isa(_, String) && (_ in ("auto","sqrt","log2"))) || _ > 0)
## TODO: Remove the "auto" option in python sklearn v1.3
max_features::Union{Int,Float64,String,Nothing} = "sqrt"::(_ === nothing || (isa(_, String) && (_ in ("auto","sqrt","log2"))) || _ > 0)
max_leaf_nodes::Option{Int} = nothing::(_ === nothing || _ > 0)
min_impurity_decrease::Float64 = 0.0::(_ ≥ 0)
bootstrap::Bool = true
Expand All @@ -229,7 +233,7 @@ MMI.fitted_params(m::RandomForestClassifier, (f, _, _)) = (
estimators = f.estimators_,
classes = f.classes_,
n_classes = f.n_classes_,
n_features = f.n_features_,
n_features = f.n_features_in_,
n_outputs = f.n_outputs_,
feature_importances = f.feature_importances_,
oob_score = m.oob_score ? f.oob_score_ : nothing,
Expand All @@ -250,12 +254,13 @@ MMI.target_scitype(::ENSEMBLE_REG) = AbstractVector{Continuous}
const ExtraTreesRegressor_ = sken(:ExtraTreesRegressor)
@sk_reg mutable struct ExtraTreesRegressor <: MMI.Deterministic
n_estimators::Int = 100::(_>0)
criterion::String = "mse"::(_ in ("mae", "mse"))
criterion::String = "squared_error"::(_ in ("squared_error","absolute_error", "friedman_mse", "poisson"))
max_depth::Option{Int} = nothing::(_ === nothing || _ > 0)
min_samples_split::Union{Int,Float64} = 2::(_ > 0)
min_samples_leaf::Union{Int,Float64} = 1::(_ > 0)
min_weight_fraction_leaf::Float64 = 0.0::(_ ≥ 0)
max_features::Union{Int,Float64,String,Nothing} = "auto"::(_ === nothing || (isa(_, String) && (_ in ("auto","sqrt","log2"))) || _ > 0)
## TODO: Remove the "auto" option in python sklearn v1.3
max_features::Union{Int,Float64,String,Nothing} = "sqrt"::(_ === nothing || (isa(_, String) && (_ in ("auto","sqrt","log2"))) || _ > 0)
max_leaf_nodes::Option{Int} = nothing::(_ === nothing || _ > 0)
min_impurity_decrease::Float64 = 0.0::(_ ≥ 0)
bootstrap::Bool = true
Expand All @@ -268,7 +273,7 @@ end
MMI.fitted_params(m::ExtraTreesRegressor, (f, _, _)) = (
estimators = f.estimators_,
feature_importances = f.feature_importances_,
n_features = f.n_features_,
n_features = f.n_features_in_,
n_outputs = f.n_outputs_,
oob_score = m.oob_score ? f.oob_score_ : nothing,
oob_prediction = m.oob_score ? f.oob_prediction_ : nothing,
Expand All @@ -293,12 +298,13 @@ ExtraTreesRegressor
const ExtraTreesClassifier_ = sken(:ExtraTreesClassifier)
@sk_clf mutable struct ExtraTreesClassifier <: MMI.Probabilistic
n_estimators::Int = 100::(_>0)
criterion::String = "gini"::(_ in ("gini", "entropy"))
criterion::String = "gini"::(_ in ("gini", "entropy", "log_loss"))
max_depth::Option{Int} = nothing::(_ === nothing || _ > 0)
min_samples_split::Union{Int,Float64} = 2::(_ > 0)
min_samples_leaf::Union{Int,Float64} = 1::(_ > 0)
min_weight_fraction_leaf::Float64 = 0.0::(_ ≥ 0)
max_features::Union{Int,Float64,String,Nothing} = "auto"::(_ === nothing || (isa(_, String) && (_ in ("auto","sqrt","log2"))) || _ > 0)
## TODO: Remove the "auto" option in python sklearn v1.3
max_features::Union{Int,Float64,String,Nothing} = "sqrt"::(_ === nothing || (isa(_, String) && (_ in ("auto","sqrt","log2"))) || _ > 0)
max_leaf_nodes::Option{Int} = nothing::(_ === nothing || _ > 0)
min_impurity_decrease::Float64 = 0.0::(_ ≥ 0)
bootstrap::Bool = true
Expand All @@ -314,7 +320,7 @@ MMI.fitted_params(m::ExtraTreesClassifier, (f, _, _)) = (
classes = f.classes_,
n_classes = f.n_classes_,
feature_importances = f.feature_importances_,
n_features = f.n_features_,
n_features = f.n_features_in_,
n_outputs = f.n_outputs_,
oob_score = m.oob_score ? f.oob_score_ : nothing,
oob_decision_function = m.oob_score ? f.oob_decision_function_ : nothing,
Expand Down
8 changes: 4 additions & 4 deletions src/models/linear-classifiers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ const RidgeClassifier_ = sklm(:RidgeClassifier)
@sk_clf mutable struct RidgeClassifier <: MMI.Deterministic
alpha::Float64 = 1.0
fit_intercept::Bool = true
normalize::Bool = false
copy_X::Bool = true
max_iter::Option{Int} = nothing::(_ === nothing || _ > 0)
tol::Float64 = 1e-3::(arg>0)
Expand All @@ -155,7 +154,6 @@ const RidgeCVClassifier_ = sklm(:RidgeClassifierCV)
@sk_clf mutable struct RidgeCVClassifier <: MMI.Deterministic
alphas::AbstractArray{Float64} = [0.1,1.0,10.0]::(all(0 .≤ _))
fit_intercept::Bool = true
normalize::Bool = false
scoring::Any = nothing
cv::Int = 5
class_weight::Any = nothing
Expand All @@ -175,7 +173,8 @@ meta(RidgeCVClassifier,
# ============================================================================
const SGDClassifier_ = sklm(:SGDClassifier)
@sk_clf mutable struct SGDClassifier <: MMI.Deterministic
loss::String = "hinge"::(_ in ("hinge", "log", "modified_huber", "squared_hinge", "perceptron", "squared_loss", "huber", "epsilon_insensitive", "squared_epsilon_insensitive"))
## TODO: remove the `log` option when python releases sklearn v1.3.
loss::String = "hinge"::(_ in ("hinge", "log_loss", "log", "modified_huber", "squared_hinge", "perceptron", "squared_error", "huber", "epsilon_insensitive", "squared_epsilon_insensitive"))
penalty::String = "l2"::(_ in ("l1", "l2", "elasticnet", "none"))
alpha::Float64 = 1e-4::(_ > 0)
l1_ratio::Float64 = 0.15::(0 ≤ _ ≤ 1)
Expand All @@ -199,7 +198,8 @@ const SGDClassifier_ = sklm(:SGDClassifier)
end
const ProbabilisticSGDClassifier_ = sklm(:SGDClassifier)
@sk_clf mutable struct ProbabilisticSGDClassifier <: MMI.Probabilistic
loss::String = "log"::(_ in ("log", "modified_huber")) # only those -> predict proba
## TODO: remove the `log` option when python releases sklearn v1.3.
loss::String = "log_loss"::(_ in ("log_loss", "log", "modified_huber")) # only those -> predict proba
penalty::String = "l2"::(_ in ("l1", "l2", "elasticnet", "none"))
alpha::Float64 = 1e-4::(_ > 0)
l1_ratio::Float64 = 0.15::(0 ≤ _ ≤ 1)
Expand Down
4 changes: 0 additions & 4 deletions src/models/linear-regressors-multi.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ const MultiTaskLassoRegressor_ = sklm(:MultiTaskLasso)
@sk_reg mutable struct MultiTaskLassoRegressor <: MMI.Deterministic
alpha::Float64 = 1.0::(_ ≥ 0)
fit_intercept::Bool = true
normalize::Bool = false
max_iter::Int = 1_000::(_ > 0)
tol::Float64 = 1e-4::(_ > 0)
copy_X::Bool = true
Expand All @@ -22,7 +21,6 @@ const MultiTaskLassoCVRegressor_ = sklm(:MultiTaskLassoCV)
n_alphas::Int = 100::(_ > 0)
alphas::Any = nothing::(_ === nothing || all(0 .≤ _ .≤ 1))
fit_intercept::Bool = true
normalize::Bool = false
max_iter::Int = 300::(_ > 0)
tol::Float64 = 1e-4::(_ > 0)
copy_X::Bool = true
Expand All @@ -47,7 +45,6 @@ const MultiTaskElasticNetRegressor_ = sklm(:MultiTaskElasticNet)
alpha::Float64 = 1.0::(_ ≥ 0)
l1_ratio::Union{Float64, Vector{Float64}} = 0.5::(0 ≤ _ ≤ 1)
fit_intercept::Bool = true
normalize::Bool = true
copy_X::Bool = true
max_iter::Int = 1_000::(_ > 0)
tol::Float64 = 1e-4::(_ > 0)
Expand All @@ -69,7 +66,6 @@ const MultiTaskElasticNetCVRegressor_ = sklm(:MultiTaskElasticNetCV)
n_alphas::Int = 100::(_ > 0)
alphas::Any = nothing::(_ === nothing || all(0 .≤ _ .≤ 1))
fit_intercept::Bool = true
normalize::Bool = false
max_iter::Int = 1_000::(_ > 0)
tol::Float64 = 1e-4::(_ > 0)
cv::Any = 5
Expand Down
Loading

0 comments on commit 075403e

Please sign in to comment.