diff --git a/.gitignore b/.gitignore index 680f24775..b3360eb7a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.json +*.cov Manifest.toml coverage/ diff --git a/Project.toml b/Project.toml index bc0732b81..3f0a9d45f 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.2.4" [deps] Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Requires = "ae029012-a4dd-5104-9daa-d747884805df" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" @@ -13,7 +14,7 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444" [compat] -Compat = "2.2, 3.2" +Compat = "2.2, 3" Distances = "0.8" Requires = "1.0.1" SpecialFunctions = "0.8, 0.9, 0.10" diff --git a/README.md b/README.md index 65dba72b0..b4597248b 100644 --- a/README.md +++ b/README.md @@ -13,19 +13,19 @@ The aim is to make the API as model-agnostic as possible while still being user- ```julia X = reshape(collect(range(-3.0,3.0,length=100)),:,1) # Set simple scaling of the data - k₁ = SqExponentialKernel(1.0) + k₁ = SqExponentialKernel() K₁ = kernelmatrix(k₁,X,obsdim=1) # Set a function transformation on the data - k₂ = MaternKernel(FunctionTransform(x->sin.(x))) + k₂ = TransformedKernel(Matern32Kernel(),FunctionTransform(x->sin.(x))) K₂ = kernelmatrix(k₂,X,obsdim=1) # Set a matrix premultiplication on the data - k₃ = PolynomialKernel(LowRankTransform(randn(4,1)),2.0,0.0) + k₃ = transform(PolynomialKernel(c=2.0,d=2.0),LowRankTransform(randn(4,1))) K₃ = kernelmatrix(k₃,X,obsdim=1) # Add and sum kernels - k₄ = 0.5*SqExponentialKernel()*LinearKernel(0.5) + 0.4*k₂ + k₄ = 0.5*SqExponentialKernel()*LinearKernel(c=0.5) + 0.4*k₂ K₄ = kernelmatrix(k₄,X,obsdim=1) plot(heatmap.([K₁,K₂,K₃,K₄],yflip=true,colorbar=false)...,layout=(2,2),title=["K₁" "K₂" "K₃" "K₄"]) diff --git a/docs/src/api.md b/docs/src/api.md index 425e50de5..830964516 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -14,7 +14,7 @@ CurrentModule = KernelFunctions KernelFunctions ``` -## Kernel Functions +## Base Kernels ```@docs SqExponentialKernel @@ -33,9 +33,11 @@ ConstantKernel WhiteKernel ``` -## Kernel Combinations +## Composite Kernels ```@docs +TransformedKernel +ScaledKernel KernelSum KernelProduct ``` diff --git a/docs/src/kernels.md b/docs/src/kernels.md index 8d5173526..a80b1b900 100644 --- a/docs/src/kernels.md +++ b/docs/src/kernels.md @@ -2,6 +2,10 @@ CurrentModule = KernelFunctions ``` +# Base Kernels + +These are the basic kernels without any transformation of the data. They are the building blocks of KernelFunctions + ## Exponential Kernels ### Exponential Kernel @@ -13,7 +17,7 @@ The [Exponential Kernel](@ref ExponentialKernel) is defined as ### Square Exponential Kernel -The [Square Exponential Kernel](@ref KernelFunctions.SqExponentialKernel) is defined as +The [Square Exponential Kernel](@ref KernelFunctions.SqExponentialKernel) is defined as ```math k(x,x') = \exp\left(-\|x-x'\|^2\right) ``` @@ -91,3 +95,13 @@ The [Square Exponential Kernel](@ref KernelFunctions.SqExponentialKernel) is def ```math k(x,x') = 0 ``` + +# Composite Kernels + +## TransformedKernel + +## ScaledKernel + +## KernelSum + +## KernelProduct diff --git a/docs/src/metrics.md b/docs/src/metrics.md index 905237cac..b93779daf 100644 --- a/docs/src/metrics.md +++ b/docs/src/metrics.md @@ -2,7 +2,15 @@ KernelFunctions.jl relies on [Distances.jl]() for computing the pairwise matrix. To do so a distance measure is needed for each kernel. Two very common ones can already be used : `SqEuclidean` and `Euclidean`. -However all kernels do not rely on distances metrics respecting all the definitions. That's why two additional metrics come with the package : `DotProduct` (``) and `Delta` (`δ(x,y)`). If you want to create a new distance just implement the following : +However all kernels do not rely on distances metrics respecting all the definitions. That's why two additional metrics come with the package : `DotProduct` (``) and `Delta` (`δ(x,y)`). +Note that all base kernels must have a defined metric defined as : +```julia + metric(::CustomKernel) = SqEuclidean() +``` + +## Adding a new metric + +If you want to create a new distance just implement the following : ```julia struct Delta <: Distances.PreMetric diff --git a/docs/src/transform.md b/docs/src/transform.md index 8b4d5c595..31744434a 100644 --- a/docs/src/transform.md +++ b/docs/src/transform.md @@ -1,10 +1,10 @@ # Transform -`Transform` is the object that takes care of transforming the input data before distances are being computed. It can be as standard as `IdentityTransform` returning the same input, can be a scalar with `ScaleTransform` multiplying the vectors by a scalar or a vector. +`Transform` is the object that takes care of transforming the input data before distances are being computed. It can be as standard as `IdentityTransform` returning the same input, or multiplying the data by a scalar with `ScaleTransform` or by a vector with `ARDTransform`. There is a more general `Transform`: `FunctionTransform` that uses a function and apply it on each vector via `mapslices`. You can also create a pipeline of `Transform` via `TransformChain`. For example `LowRankTransform(rand(10,5))∘ScaleTransform(2.0)`. -One apply a transformation on a matrix or a vector via `transform(t::Transform,v::AbstractVecOrMat)` +One apply a transformation on a matrix or a vector via `KernelFunctions.apply(t::Transform,v::AbstractVecOrMat)` ## Transforms : ```@meta @@ -14,6 +14,7 @@ CurrentModule = KernelFunctions ```@docs IdentityTransform ScaleTransform + ARDTransform LowRankTransform FunctionTransform ChainTransform diff --git a/src/KernelFunctions.jl b/src/KernelFunctions.jl index 39d28e87c..000c99034 100644 --- a/src/KernelFunctions.jl +++ b/src/KernelFunctions.jl @@ -1,6 +1,7 @@ module KernelFunctions -export kernelmatrix, kernelmatrix!, kerneldiagmatrix, kerneldiagmatrix!, kappa # Main matrix functions +export kernelmatrix, kernelmatrix!, kerneldiagmatrix, kerneldiagmatrix!, kappa +export transform export params, duplicate, set! # Helpers export Kernel @@ -11,6 +12,7 @@ export MaternKernel, Matern32Kernel, Matern52Kernel export LinearKernel, PolynomialKernel export RationalQuadraticKernel, GammaRationalQuadraticKernel export KernelSum, KernelProduct +export TransformedKernel, ScaledKernel export Transform, SelectTransform, ChainTransform, ScaleTransform, LowRankTransform, IdentityTransform, FunctionTransform @@ -22,6 +24,7 @@ using Distances, LinearAlgebra using SpecialFunctions: logabsgamma, besselk using ZygoteRules: @adjoint using StatsFuns: logtwo +using InteractiveUtils: subtypes using StatsBase const defaultobs = 2 @@ -30,7 +33,8 @@ const defaultobs = 2 Abstract type defining a slice-wise transformation on an input matrix """ abstract type Transform end -abstract type Kernel{Tr<:Transform} end +abstract type Kernel end +abstract type BaseKernel <: Kernel end include("utils.jl") include("distances/dotproduct.jl") @@ -40,6 +44,8 @@ include("transform/transform.jl") for k in ["exponential","matern","polynomial","constant","rationalquad","exponentiated"] include(joinpath("kernels",k*".jl")) end +include("kernels/transformedkernel.jl") +include("kernels/scaledkernel.jl") include("matrix/kernelmatrix.jl") include("kernels/kernelsum.jl") include("kernels/kernelproduct.jl") diff --git a/src/generic.jl b/src/generic.jl index 4154cd46c..da2e0ad03 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -1,33 +1,32 @@ -@inline metric(κ::Kernel) = κ.metric - ## Allows to iterate over kernels Base.length(::Kernel) = 1 Base.iterate(k::Kernel) = (k,nothing) Base.iterate(k::Kernel, ::Any) = nothing # default fallback for evaluating a kernel with two arguments (such as vectors etc) -kappa(κ::Kernel, x, y) = kappa(κ, evaluate(metric(κ), transform(κ, x), transform(κ, y))) +kappa(κ::Kernel, x, y) = kappa(κ, evaluate(metric(κ), x, y)) +kappa(κ::TransformedKernel, x, y) = kappa(kernel(κ), apply(κ.transform,x), apply(κ.transform,y)) +kappa(κ::TransformedKernel{<:BaseKernel,<:ScaleTransform}, x, y) = kappa(κ, _scale(κ.transform, metric(κ), x, y)) +_scale(t::ScaleTransform, metric::Euclidean, x, y) = first(t.s) * evaluate(metric, x, y) +_scale(t::ScaleTransform, metric::Union{SqEuclidean,DotProduct}, x, y) = first(t.s)^2 * evaluate(metric, x, y) +_scale(t::ScaleTransform, metric, x, y) = evaluate(metric, apply(t, x), apply(t, y)) + +printshifted(io::IO,κ::Kernel,shift::Int) = print(io,"$κ") +Base.show(io::IO,κ::Kernel) = print(io,nameof(typeof(κ))) ### Syntactic sugar for creating matrices and using kernel functions -for k in [:ExponentialKernel,:SqExponentialKernel,:GammaExponentialKernel,:MaternKernel,:Matern32Kernel,:Matern52Kernel,:LinearKernel,:PolynomialKernel,:ExponentiatedKernel,:ZeroKernel,:WhiteKernel,:ConstantKernel,:RationalQuadraticKernel,:GammaRationalQuadraticKernel] +for k in subtypes(BaseKernel) @eval begin @inline (κ::$k)(d::Real) = kappa(κ,d) #TODO Add test @inline (κ::$k)(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) = kappa(κ, x, y) - @inline (κ::$k)(X::AbstractMatrix{T},Y::AbstractMatrix{T};obsdim::Integer=defaultobs) where {T} = kernelmatrix(κ,X,Y,obsdim=obsdim) - @inline (κ::$k)(X::AbstractMatrix{T};obsdim::Integer=defaultobs) where {T} = kernelmatrix(κ,X,obsdim=obsdim) + @inline (κ::$k)(X::AbstractMatrix{T}, Y::AbstractMatrix{T}; obsdim::Integer=defaultobs) where {T} = kernelmatrix(κ, X, Y, obsdim=obsdim) + @inline (κ::$k)(X::AbstractMatrix{T}; obsdim::Integer=defaultobs) where {T} = kernelmatrix(κ, X, obsdim=obsdim) end end -### Transform generics -@inline transform(κ::Kernel) = κ.transform -@inline transform(κ::Kernel, x) = transform(transform(κ), x) -@inline transform(κ::Kernel, x, obsdim::Int) = transform(transform(κ), x, obsdim) - -## Constructors for kernels without parameters -for kernel in [:ExponentialKernel,:SqExponentialKernel,:Matern32Kernel,:Matern52Kernel,:ExponentiatedKernel] +for k in nameof.(subtypes(BaseKernel)) @eval begin - $kernel() = $kernel(IdentityTransform()) - $kernel(ρ::Real) = $kernel(ScaleTransform(ρ)) - $kernel(ρ::AbstractVector{<:Real}) = $kernel(ARDTransform(ρ)) + @deprecate($k(ρ::Real;args...),transform($k(args...),ρ)) + @deprecate($k(ρ::AbstractVector{<:Real};args...),transform($k(args...),ρ)) end end diff --git a/src/kernels/constant.jl b/src/kernels/constant.jl index 947de0153..402f6c06e 100644 --- a/src/kernels/constant.jl +++ b/src/kernels/constant.jl @@ -1,55 +1,49 @@ """ -ZeroKernel([tr=IdentityTransform()]) +ZeroKernel() -Create a kernel always returning zero +Create a kernel that always returning zero +``` + κ(x,y) = 0.0 +``` +The output type depends of `x` and `y` """ -struct ZeroKernel{Tr} <: Kernel{Tr} - transform::Tr -end - -ZeroKernel() = ZeroKernel(IdentityTransform()) +struct ZeroKernel <: BaseKernel end -@inline kappa(κ::ZeroKernel, d::T) where {T<:Real} = zero(T) +kappa(κ::ZeroKernel, d::T) where {T<:Real} = zero(T) metric(::ZeroKernel) = Delta() """ -`WhiteKernel([tr=IdentityTransform()])` +`WhiteKernel()` ``` κ(x,y) = δ(x,y) ``` Kernel function working as an equivalent to add white noise. """ -struct WhiteKernel{Tr} <: Kernel{Tr} - transform::Tr -end - -WhiteKernel() = WhiteKernel(IdentityTransform()) +struct WhiteKernel <: BaseKernel end -@inline kappa(κ::WhiteKernel,δₓₓ::Real) = δₓₓ +kappa(κ::WhiteKernel,δₓₓ::Real) = δₓₓ metric(::WhiteKernel) = Delta() """ -`ConstantKernel([tr=IdentityTransform(),[c=1.0]])` +`ConstantKernel(c=1.0)` ``` κ(x,y) = c ``` Kernel function always returning a constant value `c` """ -struct ConstantKernel{Tr, Tc<:Real} <: Kernel{Tr} - transform::Tr +struct ConstantKernel{Tc<:Real} <: BaseKernel c::Tc + function ConstantKernel(;c::T=1.0) where {T<:Real} + new{T}(c) + end end -params(k::ConstantKernel) = (params(k.transform),k.c) -opt_params(k::ConstantKernel) = (opt_params(k.transform),k.c) - -ConstantKernel(c::Real=1.0) = ConstantKernel(IdentityTransform(),c) - -ConstantKernel(t::Tr,c::Tc=1.0) where {Tr<:Transform,Tc<:Real} = ConstantKernel{Tr,Tc}(t,c) +params(k::ConstantKernel) = (k.c,) +opt_params(k::ConstantKernel) = (k.c,) -@inline kappa(κ::ConstantKernel,x::Real) = κ.c +kappa(κ::ConstantKernel,x::Real) = κ.c*one(x) metric(::ConstantKernel) = Delta() diff --git a/src/kernels/exponential.jl b/src/kernels/exponential.jl index 0302128c1..7d903109e 100644 --- a/src/kernels/exponential.jl +++ b/src/kernels/exponential.jl @@ -1,22 +1,22 @@ """ -`SqExponentialKernel([ρ=1.0])` +`SqExponentialKernel()` The squared exponential kernel is an isotropic Mercer kernel given by the formula: ``` - κ(x,y) = exp(-ρ²‖x-y‖²) + κ(x,y) = exp(-‖x-y‖²) ``` See also [`ExponentialKernel`](@ref) for a related form of the kernel or [`GammaExponentialKernel`](@ref) for a generalization. """ -struct SqExponentialKernel{Tr} <: Kernel{Tr} - transform::Tr -end +struct SqExponentialKernel <: BaseKernel end -@inline kappa(κ::SqExponentialKernel, d²::Real) = exp(-d²) -@inline iskroncompatible(::SqExponentialKernel) = true +kappa(κ::SqExponentialKernel, d²::Real) = exp(-d²) +iskroncompatible(::SqExponentialKernel) = true metric(::SqExponentialKernel) = SqEuclidean() +Base.show(io::IO,::SqExponentialKernel) = print(io,"Squared Exponential Kernel") + ## Aliases ## const RBFKernel = SqExponentialKernel const GaussianKernel = SqExponentialKernel @@ -28,14 +28,14 @@ The exponential kernel is an isotropic Mercer kernel given by the formula: κ(x,y) = exp(-ρ‖x-y‖) ``` """ -struct ExponentialKernel{Tr} <: Kernel{Tr} - transform::Tr -end +struct ExponentialKernel <: BaseKernel end -@inline kappa(κ::ExponentialKernel, d::Real) = exp(-d) -@inline iskroncompatible(::ExponentialKernel) = true +kappa(κ::ExponentialKernel, d::Real) = exp(-d) +iskroncompatible(::ExponentialKernel) = true metric(::ExponentialKernel) = Euclidean() +Base.show(io::IO,::ExponentialKernel) = print(io,"Exponential Kernel") + ## Alias ## const LaplacianKernel = ExponentialKernel @@ -46,30 +46,17 @@ The γ-exponential kernel is an isotropic Mercer kernel given by the formula: κ(x,y) = exp(-ρ^(2γ)‖x-y‖^(2γ)) ``` """ -struct GammaExponentialKernel{Tr, Tγ<:Real} <: Kernel{Tr} - transform::Tr +struct GammaExponentialKernel{Tγ<:Real} <: BaseKernel γ::Tγ - function GammaExponentialKernel{Tr,Tγ}(t::Tr, γ::Tγ) where {Tr<:Transform,Tγ<:Real} - @check_args(GammaExponentialKernel, γ, γ >= zero(Tγ), "γ > 0") - return new{Tr, Tγ}(t, γ) + function GammaExponentialKernel(;γ::T=2.0) where {T<:Real} + @check_args(GammaExponentialKernel, γ, γ >= zero(T), "γ > 0") + return new{T}(γ) end end -params(k::GammaExponentialKernel) = (params(transform),γ) -opt_params(k::GammaExponentialKernel) = (opt_params(transform),γ) - -function GammaExponentialKernel(ρ::Real=1.0, γ::Real=2.0) - GammaExponentialKernel(ScaleTransform(ρ), γ) -end - -function GammaExponentialKernel(ρ::AbstractVector{<:Real}, γ::Real=2.0) - GammaExponentialKernel(ARDTransform(ρ), γ) -end - -function GammaExponentialKernel(t::Tr, γ::Tγ=2.0) where {Tr<:Transform, Tγ<:Real} - GammaExponentialKernel{Tr, Tγ}(t, γ) -end +params(k::GammaExponentialKernel) = (γ,) +opt_params(k::GammaExponentialKernel) = (γ,) -@inline kappa(κ::GammaExponentialKernel, d²::Real) = exp(-d²^κ.γ) -@inline iskroncompatible(::GammaExponentialKernel) = true +kappa(κ::GammaExponentialKernel, d²::Real) = exp(-d²^κ.γ) +iskroncompatible(::GammaExponentialKernel) = true metric(::GammaExponentialKernel) = SqEuclidean() diff --git a/src/kernels/exponentiated.jl b/src/kernels/exponentiated.jl index 5e19ef990..ef177cb40 100644 --- a/src/kernels/exponentiated.jl +++ b/src/kernels/exponentiated.jl @@ -5,9 +5,10 @@ The exponentiated kernel is a Mercer kernel given by: κ(x,y) = exp(ρ²xᵀy) ``` """ -struct ExponentiatedKernel{Tr} <: Kernel{Tr} - transform::Tr -end -@inline kappa(κ::ExponentiatedKernel, xᵀy::T) where {T<:Real} = exp(xᵀy) +struct ExponentiatedKernel <: BaseKernel end + +kappa(κ::ExponentiatedKernel, xᵀy::Real) = exp(xᵀy) metric(::ExponentiatedKernel) = DotProduct() + +iskroncompatible(::ExponentiatedKernel) = true diff --git a/src/kernels/kernelproduct.jl b/src/kernels/kernelproduct.jl index 3c39584ef..0480dd430 100644 --- a/src/kernels/kernelproduct.jl +++ b/src/kernels/kernelproduct.jl @@ -10,15 +10,12 @@ kernelmatrix(k,X) == kernelmatrix(k1,X).*kernelmatrix(k2,X) kernelmatrix(k,X) == kernelmatrix(k1*k2,X) ``` """ -struct KernelProduct{Tr} <: Kernel{Tr} +struct KernelProduct <: Kernel kernels::Vector{Kernel} end -KernelProduct(kernels::AbstractVector{<:Kernel}) = KernelProduct{Transform}(kernels) - params(k::KernelProduct) = params.(k.kernels) opt_params(k::KernelProduct) = opt_params.(k.kernels) -duplicate(k::KernelProduct,θ) = KernelProduct(duplicate.(k.kernels,θ)) Base.:*(k1::Kernel,k2::Kernel) = KernelProduct([k1,k2]) Base.:*(k1::KernelProduct,k2::KernelProduct) = KernelProduct(vcat(k1.kernels,k2.kernels)) #TODO Add test @@ -26,10 +23,8 @@ Base.:*(k::Kernel,kp::KernelProduct) = KernelProduct(vcat(k,kp.kernels)) Base.:*(kp::KernelProduct,k::Kernel) = KernelProduct(vcat(kp.kernels,k)) Base.length(k::KernelProduct) = length(k.kernels) -metric(k::KernelProduct) = metric.(k.kernels) #TODO Add test -transform(k::KernelProduct) = transform.(k.kernels) #TODO Add test -transform(k::KernelProduct,x::AbstractVecOrMat) = transform.(k.kernels,[x]) #TODO Add test -transform(k::KernelProduct,x::AbstractVecOrMat,obsdim::Int) = transform.(k.kernels,[x],obsdim) #TODO Add test + +kappa(κ::KernelProduct, x ,y) = prod(kappa(k, x, y) for k in κ.kernels) hadamard(x,y) = x.*y @@ -54,3 +49,15 @@ function kerneldiagmatrix( obsdim::Int=defaultobs) #TODO Add test reduce(hadamard,kerneldiagmatrix(κ.kernels[i],X,obsdim=obsdim) for i in 1:length(κ)) end + +function Base.show(io::IO,κ::KernelProduct) + printshifted(io,κ,0) +end + +function printshifted(io::IO,κ::KernelProduct, shift::Int) + print(io,"Product of $(length(κ)) kernels:") + for i in 1:length(κ) + print(io,"\n"*("\t"^(shift+1))*"- ") + printshifted(io,κ.kernels[i],shift+2) + end +end diff --git a/src/kernels/kernelsum.jl b/src/kernels/kernelsum.jl index 7af88658b..8e4402be0 100644 --- a/src/kernels/kernelsum.jl +++ b/src/kernels/kernelsum.jl @@ -11,56 +11,74 @@ kernelmatrix(k,X) == kernelmatrix(k1+k2,X) kweighted = 0.5*k1 + 2.0*k2 ``` """ -struct KernelSum{Tr} <: Kernel{Tr} +struct KernelSum <: Kernel kernels::Vector{Kernel} weights::Vector{Real} - function KernelSum{Tr}(kernels::AbstractVector{<:Kernel},weights::AbstractVector{<:Real}) where {Tr} - new{Tr}(kernels,weights) - end end -function KernelSum(kernels::AbstractVector{<:Kernel}; weights::AbstractVector{<:Real}=ones(Float64,length(kernels))) - @assert length(kernels)==length(weights) "Weights and kernel vector should be of the same length" - @assert all(weights.>=0) "All weights should be positive" - KernelSum{Transform}(kernels,weights) +function KernelSum( + kernels::AbstractVector{<:Kernel}; + weights::AbstractVector{<:Real} = ones(Float64, length(kernels)), +) + @assert length(kernels) == length(weights) "Weights and kernel vector should be of the same length" + @assert all(weights .>= 0) "All weights should be positive" + KernelSum(kernels, weights) end -params(k::KernelSum) = (k.weights,params.(k.kernels)) -opt_params(k::KernelSum) = (k.weights,opt_params.(k.kernels)) -duplicate(k::KernelSum,θ) = KernelSum(duplicate.(k.kernels,θ[end]),weights=first(θ)) - -Base.:+(k1::Kernel,k2::Kernel) = KernelSum([k1,k2],weights=[1.0,1.0]) -Base.:+(k1::KernelSum,k2::KernelSum) = KernelSum(vcat(k1.kernels,k2.kernels),weights=vcat(k1.weights,k2.weights)) -Base.:+(k::Kernel,ks::KernelSum) = KernelSum(vcat(k,ks.kernels),weights=vcat(1.0,ks.weights)) -Base.:+(ks::KernelSum,k::Kernel) = KernelSum(vcat(ks.kernels,k),weights=vcat(ks.weights,1.0)) -Base.:*(w::Real,k::Kernel) = KernelSum([k],weights=[w]) #TODO add tests -Base.:*(w::Real,k::KernelSum) = KernelSum(k.kernels,weights=w*k.weights) #TODO add tests +params(k::KernelSum) = (k.weights, params.(k.kernels)) +opt_params(k::KernelSum) = (k.weights, opt_params.(k.kernels)) +Base.:+(k1::Kernel, k2::Kernel) = KernelSum([k1, k2], weights = [1.0, 1.0]) +Base.:+(k1::ScaledKernel, k2::ScaledKernel) = KernelSum([kernel(k1), kernel(k2)], weights = [first(k1.σ), first(k2.σ)]) +Base.:+(k1::KernelSum, k2::KernelSum) = + KernelSum(vcat(k1.kernels, k2.kernels), weights = vcat(k1.weights, k2.weights)) +Base.:+(k::Kernel, ks::KernelSum) = + KernelSum(vcat(k, ks.kernels), weights = vcat(1.0, ks.weights)) +Base.:+(k::ScaledKernel, ks::KernelSum) = + KernelSum(vcat(kernel(k), ks.kernels), weights = vcat(first(k.σ), ks.weights)) +Base.:+(k::ScaledKernel, ks::Kernel) = + KernelSum(vcat(kernel(k), ks), weights = vcat(first(k.σ), 1.0)) +Base.:+(ks::KernelSum, k::Kernel) = + KernelSum(vcat(ks.kernels, k), weights = vcat(ks.weights, 1.0)) +Base.:+(ks::KernelSum, k::ScaledKernel) = + KernelSum(vcat(ks.kernels, kernel(k)), weights = vcat(ks.weights, first(k.σ))) +Base.:+(ks::Kernel, k::ScaledKernel) = + KernelSum(vcat(ks, kernel(k)), weights = vcat(1.0, first(k.σ))) +Base.:*(w::Real, k::KernelSum) = KernelSum(k.kernels, weights = w * k.weights) #TODO add tests Base.length(k::KernelSum) = length(k.kernels) -metric(k::KernelSum) = metric.(k.kernels) -transform(k::KernelSum) = transform.(k.kernels) -transform(k::KernelSum,x::AbstractVecOrMat) = transform.(k.kernels,[x]) -transform(k::KernelSum,x::AbstractVecOrMat,obsdim::Int) = transform.(k.kernels,[x],obsdim) -function kernelmatrix( - κ::KernelSum, - X::AbstractMatrix; - obsdim::Int=defaultobs) - sum(κ.weights[i]*kernelmatrix(κ.kernels[i],X,obsdim=obsdim) for i in 1:length(κ)) +kappa(κ::KernelSum, x, y) = sum(κ.weights[i] * kappa(κ.kernels[i], x, y) for i in 1:length(κ)) + +function kernelmatrix(κ::KernelSum, X::AbstractMatrix; obsdim::Int = defaultobs) + sum(κ.weights[i] * kernelmatrix(κ.kernels[i], X, obsdim = obsdim) for i in 1:length(κ)) end function kernelmatrix( κ::KernelSum, X::AbstractMatrix, Y::AbstractMatrix; - obsdim::Int=defaultobs) - sum(κ.weights[i]*_kernelmatrix(κ.kernels[i],X,Y,obsdim) for i in 1:length(κ)) + obsdim::Int = defaultobs, +) + sum(κ.weights[i] * _kernelmatrix(κ.kernels[i], X, Y, obsdim) for i in 1:length(κ)) end function kerneldiagmatrix( κ::KernelSum, X::AbstractMatrix; - obsdim::Int=defaultobs) - sum(κ.weights[i]*kerneldiagmatrix(κ.kernels[i],X,obsdim=obsdim) for i in 1:length(κ)) + obsdim::Int = defaultobs, +) + sum(κ.weights[i] * kerneldiagmatrix(κ.kernels[i], X, obsdim = obsdim) for i in 1:length(κ)) +end + +function Base.show(io::IO,κ::KernelSum) + printshifted(io,κ,0) +end + +function printshifted(io::IO,κ::KernelSum, shift::Int) + print(io,"Sum of $(length(κ)) kernels:") + for i in 1:length(κ) + print(io,"\n"*("\t"^(shift+1))*"- (w=$(κ.weights[i])) ") + printshifted(io,κ.kernels[i],shift+2) + end end diff --git a/src/kernels/matern.jl b/src/kernels/matern.jl index b9bc22988..b818a4602 100644 --- a/src/kernels/matern.jl +++ b/src/kernels/matern.jl @@ -6,25 +6,18 @@ The matern kernel is an isotropic Mercer kernel given by the formula: ``` For `ν=n+1/2, n=0,1,2,...` it can be simplified and you should instead use [`ExponentialKernel`](@ref) for `n=0`, [`Matern32Kernel`](@ref), for `n=1`, [`Matern52Kernel`](@ref) for `n=2` and [`SqExponentialKernel`](@ref) for `n=∞`. """ -struct MaternKernel{Tr<:Transform, Tν<:Real} <: Kernel{Tr} - transform::Tr +struct MaternKernel{Tν<:Real} <: BaseKernel ν::Tν - function MaternKernel{Tr, Tν}(t::Tr, ν::Tν) where {Tr, Tν} - @check_args(MaternKernel, ν, ν > zero(Tν), "ν > 0") - return new{Tr, Tν}(t, ν) + function MaternKernel(;ν::T=1.5) where {T<:Real} + @check_args(MaternKernel, ν, ν > zero(T), "ν > 0") + return new{T}(ν) end end -MaternKernel(ρ::Real=1.0, ν::Real=1.5) = MaternKernel(ScaleTransform(ρ), ν) +params(k::MaternKernel) = (k.ν,) +opt_params(k::MaternKernel) = (k.ν,) -MaternKernel(ρ::AbstractVector{<:Real},ν::Real=1.5) = MaternKernel(ARDTransform(ρ), ν) - -MaternKernel(t::Tr, ν::T=1.5) where {Tr<:Transform, T<:Real} = MaternKernel{Tr, T}(t, ν) - -params(k::MaternKernel) = (params(transform(k)),k.ν) -opt_params(k::MaternKernel) = (opt_params(transform(k)),k.ν) - -@inline kappa(κ::MaternKernel, d::Real) = iszero(d) ? one(d) : exp((1.0-κ.ν)*logtwo-logabsgamma(κ.ν)[1] + κ.ν*log(sqrt(2κ.ν)*d)+log(besselk(κ.ν,sqrt(2κ.ν)*d))) +@inline kappa(κ::MaternKernel, d::Real) = iszero(d) ? one(d) : exp((one(d)-κ.ν)*logtwo-logabsgamma(κ.ν)[1] + κ.ν*log(sqrt(2κ.ν)*d)+log(besselk(κ.ν,sqrt(2κ.ν)*d))) metric(::MaternKernel) = Euclidean() @@ -35,11 +28,9 @@ The matern 3/2 kernel is an isotropic Mercer kernel given by the formula: κ(x,y) = (1+√(3)ρ‖x-y‖)exp(-√(3)ρ‖x-y‖) ``` """ -struct Matern32Kernel{Tr} <: Kernel{Tr} - transform::Tr -end +struct Matern32Kernel <: BaseKernel end -@inline kappa(κ::Matern32Kernel, d::Real) = (1+sqrt(3)*d)*exp(-sqrt(3)*d) +kappa(κ::Matern32Kernel, d::Real) = (1+sqrt(3)*d)*exp(-sqrt(3)*d) metric(::Matern32Kernel) = Euclidean() @@ -50,10 +41,8 @@ The matern 5/2 kernel is an isotropic Mercer kernel given by the formula: κ(x,y) = (1+√(5)ρ‖x-y‖ + 5ρ²‖x-y‖^2/3)exp(-√(5)ρ‖x-y‖) ``` """ -struct Matern52Kernel{Tr} <: Kernel{Tr} - transform::Tr -end +struct Matern52Kernel <: BaseKernel end -@inline kappa(κ::Matern52Kernel, d::Real) = (1+sqrt(5)*d+5*d^2/3)*exp(-sqrt(5)*d) +kappa(κ::Matern52Kernel, d::Real) = (1+sqrt(5)*d+5*d^2/3)*exp(-sqrt(5)*d) metric(::Matern52Kernel) = Euclidean() diff --git a/src/kernels/polynomial.jl b/src/kernels/polynomial.jl index 4c5c78feb..4e60ecdc1 100644 --- a/src/kernels/polynomial.jl +++ b/src/kernels/polynomial.jl @@ -6,25 +6,17 @@ The linear kernel is a Mercer kernel given by ``` Where `c` is a real number """ -struct LinearKernel{Tr, Tc<:Real} <: Kernel{Tr} - transform::Tr +struct LinearKernel{Tc<:Real} <: BaseKernel c::Tc + function LinearKernel(;c::T=0.0) where {T} + new{T}(c) + end end -function LinearKernel(ρ::T=1.0, c::Real=zero(T)) where {T<:Real} - LinearKernel(ScaleTransform(ρ), c) -end - -function LinearKernel(ρ::AbstractVector{T}, c::Real=zero(T)) where {T<:Real} - LinearKernel(ARDTransform(ρ), c) -end - -LinearKernel(t::Transform) = LinearKernel(t, 0.0) - -params(k::LinearKernel) = (params(transform(k)),k.c) -opt_params(k::LinearKernel) = (opt_params(transform(k)),k.c) +params(k::LinearKernel) = (k.c,) +opt_params(k::LinearKernel) = (k.c,) -@inline kappa(κ::LinearKernel, xᵀy::T) where {T<:Real} = xᵀy + κ.c +kappa(κ::LinearKernel, xᵀy::Real) = xᵀy + κ.c metric(::LinearKernel) = DotProduct() @@ -36,31 +28,18 @@ The polynomial kernel is a Mercer kernel given by ``` Where `c` is a real number, and `d` is a shape parameter bigger than 1 """ -struct PolynomialKernel{Tr,Tc<:Real,Td<:Real} <: Kernel{Tr} - transform::Tr +struct PolynomialKernel{Td<:Real,Tc<:Real} <: BaseKernel d::Td c::Tc - function PolynomialKernel{Tr, Tc, Td}(transform::Tr, d::Td, c::Tc) where {Tr<:Transform, Td<:Real, Tc<:Real} + function PolynomialKernel(; d::Td=2.0, c::Tc=0.0) where {Td<:Real, Tc<:Real} @check_args(PolynomialKernel, d, d >= one(Td), "d >= 1") - return new{Tr, Td, Tc}(transform,d, c) + return new{Td, Tc}(d, c) end end -function PolynomialKernel(ρ::Real=1.0, d::Td=2.0, c::Real=zero(Td)) where {Td<:Real} - PolynomialKernel(ScaleTransform(ρ), d, c) -end - -function PolynomialKernel(ρ::AbstractVector{T}, d::Real=2.0, c::Real=zero(T₁)) where {T<:Real} - PolynomialKernel(ARDTransform(ρ), d, c) -end - -function PolynomialKernel(t::Tr, d::Td=2.0, c::Tc=zero(eltype(Td))) where {Tr<:Transform, Td<:Real, Tc<:Real} - PolynomialKernel{Tr, Tc, Td}(t, d, c) -end - -params(k::PolynomialKernel) = (params(transform(k)),k.d,k.c) -opt_params(k::PolynomialKernel) = (opt_params(transform(k)),k.d,k.c) +params(k::PolynomialKernel) = (k.d,k.c) +opt_params(k::PolynomialKernel) = (k.d,k.c) -@inline kappa(κ::PolynomialKernel, xᵀy::T) where {T<:Real} = (xᵀy + κ.c)^(κ.d) +kappa(κ::PolynomialKernel, xᵀy::T) where {T<:Real} = (xᵀy + κ.c)^(κ.d) metric(::PolynomialKernel) = DotProduct() diff --git a/src/kernels/rationalquad.jl b/src/kernels/rationalquad.jl index d6e3c3821..76aa00493 100644 --- a/src/kernels/rationalquad.jl +++ b/src/kernels/rationalquad.jl @@ -2,36 +2,22 @@ RationalQuadraticKernel([ρ=1.0[,α=2.0]]) The rational-quadratic kernel is an isotropic Mercer kernel given by the formula: ``` - κ(x,y)=(1+ρ²||x−y||²/α)^(-α) + κ(x,y)=(1+||x−y||²/α)^(-α) ``` where `α` is a shape parameter of the Euclidean distance. Check [`GammaRationalQuadraticKernel`](@ref) for a generalization. """ -struct RationalQuadraticKernel{Tr,Tα<:Real} <: Kernel{Tr} - transform::Tr +struct RationalQuadraticKernel{Tα<:Real} <: BaseKernel α::Tα - function RationalQuadraticKernel{Tr, Tα}(t::Tr, α::Tα) where {Tr, Tα} - @check_args(RationalQuadraticKernel, α, α > zero(Tα), "α > 1") - return new{Tr, Tα}(t, α) + function RationalQuadraticKernel(;α::T=2.0) where {T} + @check_args(RationalQuadraticKernel, α, α > zero(T), "α > 1") + return new{T}(α) end end -function RationalQuadraticKernel(ρ::Real=1.0, α::Real=2.0) - RationalQuadraticKernel(ScaleTransform(ρ),α) -end - -function RationalQuadraticKernel(ρ::AbstractVector{<:Real}, α::Real=2.0) - RationalQuadraticKernel(ARDTransform(ρ), α) -end - -function RationalQuadraticKernel(t::Tr, α::Tα=2.0) where {Tr<:Transform, Tα<:Real} - return RationalQuadraticKernel{Tr, Tα}(t, α) -end - - -params(k::RationalQuadraticKernel) = (params(transform(k)),k.α) -opt_params(k::RationalQuadraticKernel) = (opt_params(transform(k)),k.α) +params(k::RationalQuadraticKernel) = (k.α,) +opt_params(k::RationalQuadraticKernel) = (k.α,) -@inline kappa(κ::RationalQuadraticKernel, d²::T) where {T<:Real} = (one(T)+d²/κ.α)^(-κ.α) +kappa(κ::RationalQuadraticKernel, d²::T) where {T<:Real} = (one(T)+d²/κ.α)^(-κ.α) metric(::RationalQuadraticKernel) = SqEuclidean() @@ -43,32 +29,19 @@ The Gamma-rational-quadratic kernel is an isotropic Mercer kernel given by the f ``` where `α` is a shape parameter of the Euclidean distance and `γ` is another shape parameter. """ -struct GammaRationalQuadraticKernel{Tr, Tα<:Real, Tγ<:Real} <: Kernel{Tr} - transform::Tr +struct GammaRationalQuadraticKernel{Tα<:Real, Tγ<:Real} <: BaseKernel α::Tα γ::Tγ - function GammaRationalQuadraticKernel{Tr,Tα,Tγ}(t::Tr, α::Tα, γ::Tγ) where {Tr, Tα<:Real, Tγ<:Real} + function GammaRationalQuadraticKernel(;α::Tα=2.0, γ::Tγ=2.0) where {Tα<:Real, Tγ<:Real} @check_args(GammaRationalQuadraticKernel, α, α > one(Tα), "α > 1") @check_args(GammaRationalQuadraticKernel, γ, γ >= one(Tγ), "γ >= 1") - return new{Tr, Tα, Tγ}(t, α, γ) + return new{Tα, Tγ}(α, γ) end end -function GammaRationalQuadraticKernel(ρ::Real=1.0, α::Real=2.0, γ::Real=2.0) - GammaRationalQuadraticKernel(ScaleTransform(ρ), α, γ) -end - -function GammaRationalQuadraticKernel(ρ::AbstractVector{<:Real}, α::Real=2.0, γ::Real=2.0) - GammaRationalQuadraticKernel(ARDTransform(ρ),α,γ) -end - -function GammaRationalQuadraticKernel(t::Tr,α::Tα=2.0,γ::Tγ=2.0) where {Tr<:Transform, Tα<:Real, Tγ<:Real} - GammaRationalQuadraticKernel{Tr, Tα, Tγ}(t, α, γ) -end - -params(k::GammaRationalQuadraticKernel) = (params(k.transform),k.α,k.γ) -opt_params(k::GammaRationalQuadraticKernel) = (opt_params(k.transform),k.α,k.γ) +params(k::GammaRationalQuadraticKernel) = (k.α,k.γ) +opt_params(k::GammaRationalQuadraticKernel) = (k.α,k.γ) -@inline kappa(κ::GammaRationalQuadraticKernel, d²::T) where {T<:Real} = (one(T)+d²^κ.γ/κ.α)^(-κ.α) +kappa(κ::GammaRationalQuadraticKernel, d²::T) where {T<:Real} = (one(T)+d²^κ.γ/κ.α)^(-κ.α) metric(::GammaRationalQuadraticKernel) = SqEuclidean() diff --git a/src/kernels/scaledkernel.jl b/src/kernels/scaledkernel.jl new file mode 100644 index 000000000..8f85df249 --- /dev/null +++ b/src/kernels/scaledkernel.jl @@ -0,0 +1,25 @@ +struct ScaledKernel{Tk<:Kernel,Tσ<:Real} <: Kernel + kernel::Tk + σ::Vector{Tσ} +end + +function ScaledKernel(kernel::Tk,σ::Tσ=1.0) where {Tk<:Kernel,Tσ<:Real} + @check_args(ScaledKernel, σ, σ > zero(Tσ), "σ > 0") + ScaledKernel{Tk,Tσ}(kernel,[σ]) +end + +kappa(k::ScaledKernel, x) = first(k.σ)*kappa(k.kernel, x) + +metric(k::ScaledKernel) = metric(k.kernel) + +params(k::ScaledKernel) = (k.σ,params(k.kernel)) +opt_params(k::ScaledKernel) = (k.σ,opt_params(k.kernel)) + +Base.:*(w::Real,k::Kernel) = ScaledKernel(k,w) + +Base.show(io::IO,κ::ScaledKernel) = printshifted(io,κ,0) + +function printshifted(io::IO,κ::ScaledKernel,shift::Int) + printshifted(io,κ.kernel,shift) + print(io,"\n"*("\t"^(shift+1))*"- σ = $(first(κ.σ))") +end diff --git a/src/kernels/transformedkernel.jl b/src/kernels/transformedkernel.jl new file mode 100644 index 000000000..ef02be638 --- /dev/null +++ b/src/kernels/transformedkernel.jl @@ -0,0 +1,37 @@ +struct TransformedKernel{Tk<:Kernel,Tr<:Transform} <: Kernel + kernel::Tk + transform::Tr +end + +""" +```julia + transform(k::BaseKernel, t::Transform) (1) + transform(k::BaseKernel, ρ::Real) (2) + transform(k::BaseKernel, ρ::AbstractVector) (3) +``` +(1) Create a TransformedKernel with transform `t` and kernel `k` +(2) Same as (1) with a `ScaleTransform` with scale `ρ` +(3) Same as (1) with an `ARDTransform` with scales `ρ` +""" +transform + +transform(k::BaseKernel, t::Transform) = TransformedKernel(k, t) + +transform(k::BaseKernel, ρ::Real) = TransformedKernel(k, ScaleTransform(ρ)) + +transform(k::BaseKernel,ρ::AbstractVector) = TransformedKernel(k, ARDTransform(ρ)) + +kernel(κ) = κ.kernel + +kappa(κ::TransformedKernel, x) = kappa(κ.kernel, x) + +metric(κ::TransformedKernel) = metric(κ.kernel) + +params(κ::TransformedKernel) = (params(κ.transform),params(κ.kernel)) + +Base.show(io::IO,κ::TransformedKernel) = printshifted(io,κ,0) + +function printshifted(io::IO,κ::TransformedKernel,shift::Int) + printshifted(io,κ.kernel,shift) + print(io,"\n"*("\t"^(shift+1))*"- $(κ.transform)") +end diff --git a/src/matrix/kernelmatrix.jl b/src/matrix/kernelmatrix.jl index 6c91e232c..e01572164 100644 --- a/src/matrix/kernelmatrix.jl +++ b/src/matrix/kernelmatrix.jl @@ -18,9 +18,12 @@ function kernelmatrix!( if !check_dims(K,X,X,feature_dim(obsdim),obsdim) throw(DimensionMismatch("Dimensions of the target array K $(size(K)) are not consistent with X $(size(X))")) end - map!(x->kappa(κ,x),K,pairwise(metric(κ),transform(κ,X,obsdim),dims=obsdim)) + map!(x->kappa(κ,x),K,pairwise(metric(κ),X,dims=obsdim)) end +kernelmatrix!(K::Matrix, κ::TransformedKernel, X::AbstractMatrix; obsdim::Int = defaultobs) = + kernelmatrix!(K, kernel(κ), apply(κ.transform, X, obsdim = obsdim), obsdim = obsdim) + function kernelmatrix!( K::AbstractMatrix, κ::Kernel, @@ -32,9 +35,12 @@ function kernelmatrix!( if !check_dims(K,X,Y,feature_dim(obsdim),obsdim) throw(DimensionMismatch("Dimensions $(size(K)) of the target array K are not consistent with X ($(size(X))) and Y ($(size(Y)))")) end - map!(x->kappa(κ,x),K,pairwise(metric(κ),transform(κ,X,obsdim),transform(κ,Y,obsdim),dims=obsdim)) + map!(x->kappa(κ,x),K,pairwise(metric(κ),X,Y,dims=obsdim)) end +kernelmatrix!(K::AbstractMatrix, κ::TransformedKernel, X::AbstractMatrix, Y::AbstractMatrix; obsdim::Int = defaultobs) = + kernelmatrix!(K, kernel(κ), apply(κ.transform, X, obsdim = obsdim), apply(κ.transform, Y, obsdim = obsdim), obsdim = obsdim) + ## Apply kernel on two reals ## function _kernel(κ::Kernel, x::Real, y::Real) _kernel(κ, [x], [y]) @@ -48,9 +54,12 @@ function _kernel( obsdim::Int = defaultobs ) @assert length(x) == length(y) "x and y don't have the same dimension!" - kappa(κ, evaluate(metric(κ),transform(κ,x),transform(κ,y))) + kappa(κ, evaluate(metric(κ),x,y)) end +_kernel(κ::TransformedKernel, x::AbstractVector, y::AbstractVector; obsdim::Int = defaultobs) = + _kernel(kernel(κ), apply(κ.transform, x), apply(κ.transform, y), obsdim = obsdim) + """ ``` kernelmatrix(κ::Kernel, X::Matrix ; obsdim::Int=2) @@ -74,10 +83,13 @@ function kernelmatrix( κ::Kernel, X::AbstractMatrix; obsdim::Int = defaultobs - ) - K = map(x->kappa(κ,x),pairwise(metric(κ),transform(κ,X,obsdim),dims=obsdim)) + ) + K = map(x->kappa(κ,x),pairwise(metric(κ),X,dims=obsdim)) end +kernelmatrix(κ::TransformedKernel, X::AbstractMatrix; obsdim::Int = defaultobs) = + kernelmatrix(kernel(κ), apply(κ.transform, X, obsdim = obsdim), obsdim = obsdim) + function kernelmatrix( κ::Kernel, X::AbstractMatrix, @@ -91,7 +103,10 @@ function kernelmatrix( _kernelmatrix(κ,X,Y,obsdim) end -@inline _kernelmatrix(κ,X,Y,obsdim) = map(x->kappa(κ,x),pairwise(metric(κ),transform(κ,X,obsdim),transform(κ,Y,obsdim),dims=obsdim)) +@inline _kernelmatrix(κ::Kernel,X,Y,obsdim) = map(x->kappa(κ,x),pairwise(metric(κ),X,Y,dims=obsdim)) + +kernelmatrix(κ::TransformedKernel, X::AbstractMatrix, Y::AbstractMatrix; obsdim::Int = defaultobs) = + kernelmatrix(kernel(κ), apply(κ.transform, X, obsdim = obsdim), apply(κ.transform, Y, obsdim = obsdim), obsdim = obsdim) """ ``` diff --git a/src/transform/ardtransform.jl b/src/transform/ardtransform.jl index 8aceba177..0f4b23e9b 100644 --- a/src/transform/ardtransform.jl +++ b/src/transform/ardtransform.jl @@ -28,13 +28,13 @@ end params(t::ARDTransform) = t.v dim(t::ARDTransform) = length(t.v) -function transform(t::ARDTransform,X::AbstractMatrix{<:Real},obsdim::Int) +function apply(t::ARDTransform,X::AbstractMatrix{<:Real};obsdim::Int = defaultobs) @boundscheck if dim(t) != size(X,feature_dim(obsdim)) throw(DimensionMismatch("Array has size $(size(X,!Bool(obsdim-1)+1)) on dimension $(!Bool(obsdim-1)+1)) which does not match the length of the scale transform length , $(dim(t)).")) #TODO Add test end _transform(t,X,obsdim) end -transform(t::ARDTransform,x::AbstractVector{<:Real},obsdim::Int=defaultobs) = t.v .* x +apply(t::ARDTransform,x::AbstractVector{<:Real};obsdim::Int=defaultobs) = t.v .* x _transform(t::ARDTransform,X::AbstractMatrix{<:Real},obsdim::Int=defaultobs) = obsdim == 1 ? t.v'.*X : t.v .* X Base.isequal(t::ARDTransform,t2::ARDTransform) = isequal(t.v,t2.v) diff --git a/src/transform/chaintransform.jl b/src/transform/chaintransform.jl index 6c3485d18..91d8880b8 100644 --- a/src/transform/chaintransform.jl +++ b/src/transform/chaintransform.jl @@ -17,15 +17,16 @@ function ChainTransform(v::AbstractVector{<:Transform}) ChainTransform(v) end +## Constructor to create a chain transform with an array of parameters function ChainTransform(v::AbstractVector{<:Type{<:Transform}},θ::AbstractVector) @assert length(v) == length(θ) ChainTransform(v.(θ)) end -function transform(t::ChainTransform,X::T,obsdim::Int=defaultobs) where {T} +function apply(t::ChainTransform,X::T;obsdim::Int=defaultobs) where {T} Xtr = copy(X) for tr in t.transforms - Xtr = transform(tr,Xtr,obsdim) + Xtr = apply(tr, Xtr, obsdim = obsdim) end return Xtr end diff --git a/src/transform/functiontransform.jl b/src/transform/functiontransform.jl index b69d27c99..7f78398c3 100644 --- a/src/transform/functiontransform.jl +++ b/src/transform/functiontransform.jl @@ -12,7 +12,7 @@ struct FunctionTransform{F} <: Transform f::F end -transform(t::FunctionTransform,X::T,obsdim::Int=defaultobs) where {T} = mapslices(t.f,X,dims=feature_dim(obsdim)) +apply(t::FunctionTransform, X::T; obsdim::Int = defaultobs) where {T} = mapslices(t.f, X, dims = feature_dim(obsdim)) duplicate(t::FunctionTransform,f) = FunctionTransform(f) params(t::FunctionTransform) = t.f diff --git a/src/transform/lowranktransform.jl b/src/transform/lowranktransform.jl index 313f38cd6..af8541996 100644 --- a/src/transform/lowranktransform.jl +++ b/src/transform/lowranktransform.jl @@ -21,13 +21,13 @@ params(t::LowRankTransform) = t.proj Base.size(tr::LowRankTransform,i::Int) = size(tr.proj,i) Base.size(tr::LowRankTransform) = size(tr.proj) # TODO Add test -function transform(t::LowRankTransform,X::AbstractMatrix{<:Real},obsdim::Int=defaultobs) +function apply(t::LowRankTransform, X::AbstractMatrix{<:Real}; obsdim::Int = defaultobs) @boundscheck size(t,2) != size(X,feature_dim(obsdim)) ? throw(DimensionMismatch("The projection matrix has size $(size(t)) and cannot be used on X with dimensions $(size(X))")) : nothing @inbounds _transform(t,X,obsdim) end -function transform(t::LowRankTransform,x::AbstractVector{<:Real},obsdim::Int=defaultobs) #TODO Add test +function apply(t::LowRankTransform, x::AbstractVector{<:Real}; obsdim::Int = defaultobs) #TODO Add test @assert size(t,2) == length(x) "Vector has wrong dimensions $(length(x)) compared to projection matrix" t.proj*x end diff --git a/src/transform/scaletransform.jl b/src/transform/scaletransform.jl index 9a71e6010..c7d57835b 100644 --- a/src/transform/scaletransform.jl +++ b/src/transform/scaletransform.jl @@ -16,9 +16,11 @@ function ScaleTransform(s::T=1.0) where {T<:Real} end set!(t::ScaleTransform,ρ::Real) = t.s .= [ρ] -params(t::ScaleTransform) = first(t.s) +params(t::ScaleTransform) = t.s dim(str::ScaleTransform) = 1 -transform(t::ScaleTransform,x::AbstractVecOrMat,obsdim::Int=defaultobs) = first(t.s) * x +apply(t::ScaleTransform,x::AbstractVecOrMat;obsdim::Int=defaultobs) = first(t.s) * x Base.isequal(t::ScaleTransform,t2::ScaleTransform) = isequal(first(t.s),first(t2.s)) + +Base.show(io::IO,t::ScaleTransform) = print(io,"Scale Transform s=$(first(t.s))") diff --git a/src/transform/selecttransform.jl b/src/transform/selecttransform.jl index 4151c6591..9e8770139 100644 --- a/src/transform/selecttransform.jl +++ b/src/transform/selecttransform.jl @@ -29,13 +29,13 @@ duplicate(t::SelectTransform,θ) = t Base.maximum(t::SelectTransform) = maximum(t.select) -function transform(t::SelectTransform,X::AbstractMatrix{<:Real},obsdim::Int=defaultobs) +function apply(t::SelectTransform, X::AbstractMatrix{<:Real}; obsdim::Int = defaultobs) @boundscheck maximum(t) >= size(X,feature_dim(obsdim)) ? throw(DimensionMismatch("The highest index $(maximum(t)) is higher then the feature dimension of X : $(size(X,feature_dim(obsdim)))")) : nothing @inbounds _transform(t,X,obsdim) end -function transform(t::SelectTransform,x::AbstractVector{<:Real},obsdim::Int=defaultobs) #TODO Add test +function apply(t::SelectTransform, x::AbstractVector{<:Real}; obsdim::Int = defaultobs) #TODO Add test @assert maximum(t) <= length(x) "The highest index $(maximum(t)) is higher then the vector length : $(length(x))" return @inbounds view(x,t.select) end diff --git a/src/transform/transform.jl b/src/transform/transform.jl index 5c70c1302..6b2c7b988 100644 --- a/src/transform/transform.jl +++ b/src/transform/transform.jl @@ -1,14 +1,4 @@ export Transform, IdentityTransform, ScaleTransform, ARDTransform, LowRankTransform, FunctionTransform, ChainTransform -export transform - -""" -```julia - transform(t::Transform, X::AbstractMatrix) - transform(k::Kernel, X::AbstractMatrix) -``` -Apply the transfomration `t` or `k.transform` on the input `X` -""" -transform include("scaletransform.jl") include("ardtransform.jl") @@ -24,9 +14,8 @@ Return exactly the input struct IdentityTransform <: Transform end params(t::IdentityTransform) = nothing -duplicate(t::IdentityTransform,θ) = t -transform(t::IdentityTransform, x, obsdim::Int=defaultobs) = x #TODO add test +apply(t::IdentityTransform, x; obsdim::Int=defaultobs) = x #TODO add test ### TODO Maybe defining adjoints could help but so far it's not working diff --git a/src/utils.jl b/src/utils.jl index d2ce93e2f..c291e402f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -28,25 +28,7 @@ feature_dim(obsdim::Int) = obsdim == 1 ? 2 : 1 base_kernel(k::Kernel) = eval(nameof(typeof(k))) -base_transform(k::Kernel) = base_transform(transform(k)) base_transform(t::Transform) = eval(nameof(typeof(t))) -_tail(v::AbstractVector) = view(v,2:length(v)) - -""" -```julia - duplicate(k::Kernel,θ) - duplicate(t::Transform,θ) -``` -Recreate a kernel (transform) with the same structure as `k` (`t`) with the appropriate new parameters `θ`. -`theta` should have the same structure then the one given by `params(k)` (`params(t)`). -""" -duplicate - -duplicate(k::Kernel,θ::AbstractVector) = base_kernel(k)(duplicate(transform(k),first(θ)),_tail(θ)...) -duplicate(k::Kernel,θ::Tuple) = base_kernel(k)(duplicate(transform(k),first(θ)),Base.tail(θ)...) -duplicate(t::Transform,θ) = base_transform(t)(θ) - -dim(k::Kernel) = length(params(k)) """ ```julia diff --git a/test/test_constructors.jl b/test/test_constructors.jl index 8c1724e0f..978786084 100644 --- a/test/test_constructors.jl +++ b/test/test_constructors.jl @@ -5,26 +5,47 @@ l = 2.0 vl = [l,l] s = ScaleTransform(l) +## Add tests for Transformed Kernel and Scaled Kernel + ## SqExponentialKernel -@testset "SqExponentialKernel" begin - @test KernelFunctions.metric(SqExponentialKernel(l)) == SqEuclidean() - @test isequal(transform(SqExponentialKernel(l)),s) - @test KernelFunctions.transform(SqExponentialKernel(vl)) == ARDTransform(vl) - @test isequal(KernelFunctions.transform(SqExponentialKernel(s)),s) +@testset "Exponential" begin + @test KernelFunctions.metric(ExponentialKernel()) == Euclidean() + @test KernelFunctions.metric(SqExponentialKernel()) == SqEuclidean() + @test KernelFunctions.metric(GammaExponentialKernel()) == SqEuclidean() + @test KernelFunctions.metric(GammaExponentialKernel(γ=2.0)) == SqEuclidean() end ## MaternKernel @testset "MaternKernel" begin - @test KernelFunctions.metric(MaternKernel(l)) == Euclidean() - @test KernelFunctions.metric(Matern32Kernel(l)) == Euclidean() - @test KernelFunctions.metric(Matern52Kernel(l)) == Euclidean() - @test isequal(KernelFunctions.transform(MaternKernel(l)),s) - @test isequal(KernelFunctions.transform(Matern32Kernel(l)),s) - @test isequal(KernelFunctions.transform(Matern52Kernel(l)),s) - @test KernelFunctions.transform(MaternKernel(vl)) == ARDTransform(vl) - @test KernelFunctions.transform(Matern32Kernel(vl)) == ARDTransform(vl) - @test KernelFunctions.transform(Matern52Kernel(vl)) == ARDTransform(vl) - @test KernelFunctions.transform(MaternKernel(s)) == s - @test KernelFunctions.transform(Matern32Kernel(s)) == s - @test KernelFunctions.transform(Matern52Kernel(s)) == s + @test KernelFunctions.metric(MaternKernel()) == Euclidean() + @test KernelFunctions.metric(MaternKernel(ν=2.0)) == Euclidean() + @test KernelFunctions.metric(Matern32Kernel()) == Euclidean() + @test KernelFunctions.metric(Matern52Kernel()) == Euclidean() +end + +@testset "Exponentiated" begin + @test KernelFunctions.metric(ExponentiatedKernel()) == KernelFunctions.DotProduct() +end + +@testset "Constant" begin + @test KernelFunctions.metric(ConstantKernel()) == KernelFunctions.Delta() + @test KernelFunctions.metric(ConstantKernel(c=2.0)) == KernelFunctions.Delta() + @test KernelFunctions.metric(WhiteKernel()) == KernelFunctions.Delta() + @test KernelFunctions.metric(ZeroKernel()) == KernelFunctions.Delta() +end + +@testset "Polynomial" begin + @test KernelFunctions.metric(LinearKernel()) == KernelFunctions.DotProduct() + @test KernelFunctions.metric(LinearKernel(c=2.0)) == KernelFunctions.DotProduct() + @test KernelFunctions.metric(PolynomialKernel()) == KernelFunctions.DotProduct() + @test KernelFunctions.metric(PolynomialKernel(d=3.0)) == KernelFunctions.DotProduct() + @test KernelFunctions.metric(PolynomialKernel(d=3.0,c=2.0)) == KernelFunctions.DotProduct() +end + +@testset "RationalQuadratic" begin + @test KernelFunctions.metric(RationalQuadraticKernel()) == SqEuclidean() + @test KernelFunctions.metric(RationalQuadraticKernel(α=2.0)) == SqEuclidean() + @test KernelFunctions.metric(GammaRationalQuadraticKernel()) == SqEuclidean() + @test KernelFunctions.metric(GammaRationalQuadraticKernel(γ=2.0)) == SqEuclidean() + @test KernelFunctions.metric(GammaRationalQuadraticKernel(γ=2.0,α=3.0)) == SqEuclidean() end diff --git a/test/test_custom.jl b/test/test_custom.jl index cd8c8db50..4edb29a1e 100644 --- a/test/test_custom.jl +++ b/test/test_custom.jl @@ -2,11 +2,10 @@ using KernelFunctions using Test # minimal definition of a custom kernel -struct MyKernel <: Kernel{IdentityTransform} end +struct MyKernel <: Kernel end KernelFunctions.kappa(::MyKernel, d2::Real) = exp(-d2) KernelFunctions.metric(::MyKernel) = SqEuclidean() -KernelFunctions.transform(::MyKernel) = IdentityTransform() @test kappa(MyKernel(), 3) == kappa(SqExponentialKernel(), 3) @test kappa(MyKernel(), 1, 3) == kappa(SqExponentialKernel(), 1, 3) @@ -23,4 +22,4 @@ KernelFunctions.transform(::MyKernel) = IdentityTransform() @test MyKernel()(3) == SqExponentialKernel()(3) @test MyKernel()([1, 2], [3, 4]) == SqExponentialKernel()([1, 2], [3, 4]) @test MyKernel()([1 2; 3 4], [5 6; 7 8]) == SqExponentialKernel()([1 2; 3 4], [5 6; 7 8]) -@test MyKernel()([1 2; 3 4]) == SqExponentialKernel()([1 2; 3 4]) \ No newline at end of file +@test MyKernel()([1 2; 3 4]) == SqExponentialKernel()([1 2; 3 4]) diff --git a/test/test_kernelmatrix.jl b/test/test_kernelmatrix.jl index 0ab67adc6..742d9f43a 100644 --- a/test/test_kernelmatrix.jl +++ b/test/test_kernelmatrix.jl @@ -10,7 +10,9 @@ B = rand(dims...) C = rand(8,9) K = [zeros(dims[1],dims[1]),zeros(dims[2],dims[2])] Kdiag = [zeros(dims[1]),zeros(dims[2])] +s = rand() k = SqExponentialKernel() +kt = transform(SqExponentialKernel(),s) @testset "Kernel Matrix Operations" begin @testset "Inplace Kernel Matrix" begin for obsdim in [1,2] @@ -33,6 +35,22 @@ k = SqExponentialKernel() @test_throws DimensionMismatch kernelmatrix(k,A,C,obsdim=obsdim) end end + @testset "Transformed Kernel Matrix Operations" begin + @testset "Inplace Kernel Matrix" begin + for obsdim in [1,2] + @test kernelmatrix!(K[obsdim],kt,A,B,obsdim=obsdim) == kernelmatrix(k,s*A,s*B,obsdim=obsdim) + @test kernelmatrix!(K[obsdim],kt,A,obsdim=obsdim) == kernelmatrix(k,s*A,obsdim=obsdim) + @test kerneldiagmatrix!(Kdiag[obsdim],kt,A,obsdim=obsdim) == kerneldiagmatrix(k,s*A,obsdim=obsdim) + end + end + @testset "Kernel matrix" begin + for obsdim in [1,2] + @test kernelmatrix(kt,A,B,obsdim=obsdim) == kernelmatrix(k,s*A,s*B,obsdim=obsdim) + @test kernelmatrix(kt,A,obsdim=obsdim) == kernelmatrix(k,s*A,obsdim=obsdim) + @test kerneldiagmatrix(kt,A,obsdim=obsdim) == kerneldiagmatrix(k,s*A,obsdim=obsdim) + end + end + end @testset "KernelSum" begin k1 = SqExponentialKernel() k2 = LinearKernel() @@ -52,7 +70,6 @@ k = SqExponentialKernel() k3 = RationalQuadraticKernel() kp = k1 * k2 kp2 = k1 * k3 - @test all(KernelFunctions.metric(kp).==[KernelFunctions.metric(k1),KernelFunctions.metric(k2)]) @test all(kernelmatrix(kp,A) .≈ kernelmatrix(k1,A) .* kernelmatrix(k2,A)) @test all(kernelmatrix(kp*k1,A) .≈ kernelmatrix(k1,A).^2 .* kernelmatrix(k2,A)) @test all(kernelmatrix(k1*kp,A) .≈ kernelmatrix(k1,A).^2 .* kernelmatrix(k2,A)) diff --git a/test/test_kernels.jl b/test/test_kernels.jl index 5eb9202c0..1046004db 100644 --- a/test/test_kernels.jl +++ b/test/test_kernels.jl @@ -19,10 +19,8 @@ x = rand()*2; v1 = rand(3); v2 = rand(3); id = IdentityTransform() end @testset "ConstantKernel" begin c = 2.0 - k = ConstantKernel(c) - k₂ = ConstantKernel(IdentityTransform(),c) + k = ConstantKernel(c=c) @test eltype(k) == Any - @test kappa(k,1.5)== kappa(k₂,1.5) @test kappa(k,1.0) == c @test kappa(k,0.5) == c end @@ -32,40 +30,23 @@ x = rand()*2; v1 = rand(3); v2 = rand(3); id = IdentityTransform() k = SqExponentialKernel() @test kappa(k,x) ≈ exp(-x) @test k(v1,v2) ≈ exp(-norm(v1-v2)^2) - @test kappa(SqExponentialKernel(id),x) == kappa(k,x) - l = 0.5 - k = SqExponentialKernel(l) - @test k(v1,v2) ≈ exp(-l^2*norm(v1-v2)^2) - v = rand(3) - k = SqExponentialKernel(v) - @test k(v1,v2) ≈ exp(-norm(v.*(v1-v2))^2) + @test kappa(SqExponentialKernel(),x) == kappa(k,x) end @testset "ExponentialKernel" begin k = ExponentialKernel() @test kappa(k,x) ≈ exp(-x) @test k(v1,v2) ≈ exp(-norm(v1-v2)) - @test kappa(ExponentialKernel(id),x) == kappa(k,x) - l = 0.5 - k = ExponentialKernel(l) - @test k(v1,v2) ≈ exp(-l*norm(v1-v2)) - v = rand(3) - k = ExponentialKernel(v) - @test k(v1,v2) ≈ exp(-norm(v.*(v1-v2))) + @test kappa(ExponentialKernel(),x) == kappa(k,x) end @testset "GammaExponentialKernel" begin - k = GammaExponentialKernel(1.0,2.0) - @test kappa(k,x) ≈ exp(-(x)^(k.γ)) - @test k(v1,v2) ≈ exp(-norm(v1-v2)^(2k.γ)) - @test kappa(GammaExponentialKernel(id),x) == kappa(k,x) - l = 0.5 - k = GammaExponentialKernel(l,1.5) - @test k(v1,v2) ≈ exp(-l^(3.0)*norm(v1-v2)^(3.0)) - v = rand(3) - k = GammaExponentialKernel(v,3.0) - @test k(v1,v2) ≈ exp(-norm(v.*(v1-v2)).^6.0) + γ = 2.0 + k = GammaExponentialKernel(γ=γ) + @test kappa(k,x) ≈ exp(-(x)^(γ)) + @test k(v1,v2) ≈ exp(-norm(v1-v2)^(2γ)) + @test kappa(GammaExponentialKernel(),x) == kappa(k,x) #Coherence : - @test KernelFunctions._kernel(GammaExponentialKernel(1.0,1.0),v1,v2) ≈ KernelFunctions._kernel(SqExponentialKernel(),v1,v2) - @test KernelFunctions._kernel(GammaExponentialKernel(1.0,0.5),v1,v2) ≈ KernelFunctions._kernel(ExponentialKernel(),v1,v2) + @test KernelFunctions._kernel(GammaExponentialKernel(γ=1.0),v1,v2) ≈ KernelFunctions._kernel(SqExponentialKernel(),v1,v2) + @test KernelFunctions._kernel(GammaExponentialKernel(γ=0.5),v1,v2) ≈ KernelFunctions._kernel(ExponentialKernel(),v1,v2) end end @testset "Exponentiated" begin @@ -74,57 +55,33 @@ x = rand()*2; v1 = rand(3); v2 = rand(3); id = IdentityTransform() @test kappa(k,x) ≈ exp(x) @test kappa(k,-x) ≈ exp(-x) @test k(v1,v2) ≈ exp(dot(v1,v2)) - l = 0.5 - k = ExponentiatedKernel(l) - @test k(v1,v2) ≈ exp(l^2*dot(v1,v2)) - v = rand(3) - k = ExponentiatedKernel(v) - @test k(v1,v2) ≈ exp(dot(v.*v1,v.*v2)) end end @testset "Matern" begin @testset "MaternKernel" begin ν = 2.0 - k = MaternKernel(1.0,ν) + k = MaternKernel(ν=ν) matern(x,ν) = 2^(1-ν)/gamma(ν)*(sqrt(2ν)*x)^ν*besselk(ν,sqrt(2ν)*x) @test kappa(k,x) ≈ matern(x,ν) @test kappa(k,0.0) == 1.0 - @test kappa(MaternKernel(id,ν),x) == kappa(k,x) - l = 0.5; ν = 3.0 - k = MaternKernel(l,ν) - @test k(v1,v2) ≈ matern(l*norm(v1-v2),ν) - v = rand(3); ν = 2.1 - k = MaternKernel(v,ν) - @test k(v1,v2) ≈ matern(norm(v.*(v1-v2)),ν) + @test kappa(MaternKernel(ν=ν),x) == kappa(k,x) end @testset "Matern32Kernel" begin k = Matern32Kernel() @test kappa(k,x) ≈ (1+sqrt(3)*x)exp(-sqrt(3)*x) @test k(v1,v2) ≈ (1+sqrt(3)*norm(v1-v2))exp(-sqrt(3)*norm(v1-v2)) - @test kappa(Matern32Kernel(id),x) == kappa(k,x) - l = 0.5 - k = Matern32Kernel(l) - @test k(v1,v2) ≈ (1+l*sqrt(3)*norm(v1-v2))exp(-l*sqrt(3)*norm(v1-v2)) - v = rand(3) - k = Matern32Kernel(v) - @test k(v1,v2) ≈ (1+sqrt(3)*norm(v.*(v1-v2)))exp(-sqrt(3)*norm(v.*(v1-v2))) + @test kappa(Matern32Kernel(),x) == kappa(k,x) end @testset "Matern52Kernel" begin k = Matern52Kernel() @test kappa(k,x) ≈ (1+sqrt(5)*x+5/3*x^2)exp(-sqrt(5)*x) @test k(v1,v2) ≈ (1+sqrt(5)*norm(v1-v2)+5/3*norm(v1-v2)^2)exp(-sqrt(5)*norm(v1-v2)) - @test kappa(Matern52Kernel(id),x) == kappa(k,x) - l = 0.5 - k = Matern52Kernel(l) - @test k(v1,v2) ≈ (1+l*sqrt(5)*norm(v1-v2)+l^2*5/3*norm(v1-v2)^2)exp(-l*sqrt(5)*norm(v1-v2)) - v = rand(3) - k = Matern52Kernel(v) - @test k(v1,v2) ≈ (1+sqrt(5)*norm(v.*(v1-v2))+5/3*norm(v.*(v1-v2))^2)exp(-sqrt(5)*norm(v.*(v1-v2))) + @test kappa(Matern52Kernel(),x) == kappa(k,x) end @testset "Coherence Materns" begin - @test kappa(MaternKernel(1.0,0.5),x) ≈ kappa(ExponentialKernel(),x) - @test kappa(MaternKernel(1.0,1.5),x) ≈ kappa(Matern32Kernel(),x) - @test kappa(MaternKernel(1.0,2.5),x) ≈ kappa(Matern52Kernel(),x) + @test kappa(MaternKernel(ν=0.5),x) ≈ kappa(ExponentialKernel(),x) + @test kappa(MaternKernel(ν=1.5),x) ≈ kappa(Matern32Kernel(),x) + @test kappa(MaternKernel(ν=2.5),x) ≈ kappa(Matern52Kernel(),x) end end @testset "Polynomial" begin @@ -133,28 +90,15 @@ x = rand()*2; v1 = rand(3); v2 = rand(3); id = IdentityTransform() k = LinearKernel() @test kappa(k,x) ≈ x @test k(v1,v2) ≈ dot(v1,v2) - @test kappa(LinearKernel(id),x) == kappa(k,x) - l = 0.5 - k = LinearKernel(l,c) - @test k(v1,v2) ≈ l^2*dot(v1,v2) + c - v = rand(3) - k = LinearKernel(v,c) - @test k(v1,v2) ≈ dot(v.*v1,v.*v2) + c + @test kappa(LinearKernel(),x) == kappa(k,x) end @testset "PolynomialKernel" begin k = PolynomialKernel() @test kappa(k,x) ≈ x^2 @test k(v1,v2) ≈ dot(v1,v2)^2 - @test kappa(PolynomialKernel(id),x) == kappa(k,x) - d = 3.0 - l = 0.5 - k = PolynomialKernel(l,d,c) - @test k(v1,v2) ≈ (l^2*dot(v1,v2) + c)^d - v = rand(3) - k = PolynomialKernel(v,d,c) - @test k(v1,v2) ≈ (dot(v.*v1,v.*v2) + c)^d + @test kappa(PolynomialKernel(),x) == kappa(k,x) #Coherence test - @test kappa(PolynomialKernel(1.0,1.0,c),x) ≈ kappa(LinearKernel(1.0,c),x) + @test kappa(PolynomialKernel(d=1.0,c=c),x) ≈ kappa(LinearKernel(c=c),x) end end @testset "RationalQuadratic" begin @@ -162,46 +106,58 @@ x = rand()*2; v1 = rand(3); v2 = rand(3); id = IdentityTransform() k = RationalQuadraticKernel() @test kappa(k,x) ≈ (1.0+x/2.0)^-2 @test k(v1,v2) ≈ (1.0+norm(v1-v2)^2/2.0)^-2 - @test kappa(RationalQuadraticKernel(id),x) == kappa(k,x) - l = 0.5 - a = 1.0 + rand() - k = RationalQuadraticKernel(l,a) - @test k(v1,v2) ≈ (1.0+l^2*norm(v1-v2)^2/a)^-a - v = rand(3) - k = RationalQuadraticKernel(v,a) - @test k(v1,v2) ≈ (1.0+norm(v.*(v1-v2))^2/a)^-a + @test kappa(RationalQuadraticKernel(),x) == kappa(k,x) end @testset "GammaRationalQuadraticKernel" begin k = GammaRationalQuadraticKernel() @test kappa(k,x) ≈ (1.0+x^2.0/2.0)^-2 @test k(v1,v2) ≈ (1.0+norm(v1-v2)^4.0/2.0)^-2 - @test kappa(GammaRationalQuadraticKernel(id),x) == kappa(k,x) - l = 0.5 + @test kappa(GammaRationalQuadraticKernel(),x) == kappa(k,x) a = 1.0 + rand() - g = 4.0 - k = GammaRationalQuadraticKernel(l,a,g) - @test k(v1,v2) ≈ (1.0+(l^2g)*norm(v1-v2)^(2g)/a)^-a - v = rand(3) - k = GammaRationalQuadraticKernel(v,a,g) - @test k(v1,v2) ≈ (1.0+(norm(v.*(v1-v2))^(2g))/a)^-a #Coherence test - @test kappa(GammaRationalQuadraticKernel(1.0,a,1.0),x) ≈ kappa(RationalQuadraticKernel(1.0,a),x) + @test kappa(GammaRationalQuadraticKernel(α=a,γ=1.0),x) ≈ kappa(RationalQuadraticKernel(α=a),x) end end + @testset "Transformed/Scaled Kernel" begin + s = rand() + v = rand(3) + k = SqExponentialKernel() + kt = TransformedKernel(k,ScaleTransform(s)) + ktard = TransformedKernel(k,ARDTransform(v)) + ks = ScaledKernel(k,s) + @test kappa(kt,v1,v2) == kappa(transform(k,ScaleTransform(s)),v1,v2) + @test kappa(kt,v1,v2) == kappa(transform(k,s),v1,v2) + @test kappa(kt,v1,v2) == kappa(k,s*v1,s*v2) + @test kappa(ktard,v1,v2) == kappa(transform(k,ARDTransform(v)),v1,v2) + @test kappa(ktard,v1,v2) == kappa(transform(k,v),v1,v2) + @test kappa(ktard,v1,v2) == kappa(k,v.*v1,v.*v2) + @test KernelFunctions.metric(kt) == KernelFunctions.metric(k) + @test kappa(ks,x) == s*kappa(k,x) + @test kappa(ks,x) == kappa(s*k,x) + end @testset "KernelCombinations" begin k1 = LinearKernel() k2 = SqExponentialKernel() + k3 = RationalQuadraticKernel() X = rand(2,2) @testset "KernelSum" begin - k = k1 + k2 - @test KernelFunctions.metric(k) == [KernelFunctions.DotProduct(),KernelFunctions.SqEuclidean()] + w = [2.0,0.5] + k = KernelSum([k1,k2],w) + ks1 = 2.0*k1 + ks2 = 0.5*k2 @test length(k) == 2 - @test transform(k) == [transform(k1),transform(k2)] - @test transform(k,X) == [transform(k1,X),transform(k2,X)] - @test transform(k,X,1) == [transform(k1,X,1),transform(k2,X,1)] + @test kappa(k,v1,v2) == kappa(2.0*k1+0.5*k2,v1,v2) + @test kappa(k+k3,v1,v2) ≈ kappa(k3+k,v1,v2) + @test kappa(k1+k2,v1,v2) == kappa(KernelSum([k1,k2]),v1,v2) + @test kappa(k+ks1,v1,v2) ≈ kappa(ks1+k,v1,v2) + @test kappa(k+k,v1,v2) == kappa(KernelSum([k1,k2,k1,k2],vcat(w,w)),v1,v2) end @testset "KernelProduct" begin - + k = KernelProduct([k1,k2]) + @test length(k) == 2 + @test kappa(k,v1,v2) == kappa(k1*k2,v1,v2) + @test kappa(k*k,v1,v2) ≈ kappa(k,v1,v2)^2 + @test kappa(k*k3,v1,v2) ≈ kappa(k3*k,v1,v2) end end end diff --git a/test/test_transform.jl b/test/test_transform.jl index 511b6613e..2d6dc4c5e 100644 --- a/test/test_transform.jl +++ b/test/test_transform.jl @@ -16,9 +16,12 @@ f(x) = sin.(x) @testset "Transform Test" begin ## Test Scale Transform + @testset "IdentityTransform" begin + @test KernelFunctions.apply(IdentityTransform(),X)==X + end @testset "ScaleTransform" begin t = ScaleTransform(s) - @test all(KernelFunctions.transform(t,X).==s*X) + @test all(KernelFunctions.apply(t,X).==s*X) s2 = 2.0 KernelFunctions.set!(t,s2) @test all(t.s.==[s2]) @@ -27,14 +30,18 @@ f(x) = sin.(x) @testset "ARDTransform" begin vt1 = ARDTransform(v1) vt2 = ARDTransform(v2) - @test all(KernelFunctions.transform(vt1,X,1).==v1'.*X) - @test all(KernelFunctions.transform(vt2,X,2).==v2.*X) + @test all(KernelFunctions.apply(vt1,X,obsdim=1).==v1'.*X) + @test all(KernelFunctions.apply(vt2,X,obsdim=2).==v2.*X) + newv1 = rand(5) + KernelFunctions.set!(vt1,newv1) + @test all(vt1.v .== newv1) + @test_throws DimensionMismatch KernelFunctions.apply(vt1,rand(3,4)) end ## Test LowRankTransform @testset "LowRankTransform" begin tp = LowRankTransform(P) - @test all(KernelFunctions.transform(tp,X,2).==P*X) - @test all(KernelFunctions.transform(tp,x).==P*x) + @test all(KernelFunctions.apply(tp,X,obsdim=2).==P*X) + @test all(KernelFunctions.apply(tp,x).==P*x) @test all(KernelFunctions.params(tp).==P) P2 = rand(5,10) KernelFunctions.set!(tp,P2) @@ -43,15 +50,14 @@ f(x) = sin.(x) ## Test FunctionTransform @testset "FunctionTransform" begin tf = FunctionTransform(f) - KernelFunctions.transform(tf,X,1) - @test all(KernelFunctions.transform(tf,X,1).==f(X)) + KernelFunctions.apply(tf,X,obsdim=1) + @test all(KernelFunctions.apply(tf,X,obsdim=1).==f(X)) end ## Test SelectTransform @testset "SelectTransform" begin ts = SelectTransform(sdims) - @test all(KernelFunctions.transform(ts,X,2).==X[sdims,:]) - @test all(KernelFunctions.transform(ts,x).==x[sdims]) - @test all(KernelFunctions.params(ts).==sdims) + @test all(KernelFunctions.apply(ts,X,obsdim=2).==X[sdims,:]) + @test all(KernelFunctions.apply(ts,x).==x[sdims]) sdims2 = [2,3,5] KernelFunctions.set!(ts,sdims2) @test all(ts.select.==sdims2) @@ -62,8 +68,8 @@ f(x) = sin.(x) tp = LowRankTransform(P) tf = FunctionTransform(f) tchain = ChainTransform([t,tp,tf]) - @test all(KernelFunctions.transform(tchain,X,2).==f(P*(s*X))) - @test all(KernelFunctions.transform(tchain,X,2).== - KernelFunctions.transform(tf∘tp∘t,X,2)) + @test all(KernelFunctions.apply(tchain,X,obsdim=2).==f(P*(s*X))) + @test all(KernelFunctions.apply(tchain,X,obsdim=2).== + KernelFunctions.apply(tf∘tp∘t,X,obsdim=2)) end end