diff --git a/Project.toml b/Project.toml index f0ef692..7021572 100644 --- a/Project.toml +++ b/Project.toml @@ -21,6 +21,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +TableTransforms = "0d432bfd-3ee1-4ac1-886a-39f05cc69a3e" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" @@ -49,6 +50,7 @@ Random = "1.9" Setfield = "1.0" StaticArrays = "1.9" Statistics = "1.9" +TableTransforms = "1.33" Tables = "1.6" Unitful = "1.11" julia = "1.9" diff --git a/ext/transioplot.jl b/ext/transioplot.jl index 3b53093..cdd2d58 100644 --- a/ext/transioplot.jl +++ b/ext/transioplot.jl @@ -2,16 +2,81 @@ # Licensed under the MIT License. See LICENSE in the project root. # ------------------------------------------------------------------ +# ---------- +# EMPIRICAL +# ---------- + +function transioplot( + t::EmpiricalTransiogram; + # common transiogram options + color=:slategray, + size=1.5, + maxlag=nothing, + levels=nothing, + + # empirical transiogram options + pointsize=12, + showtext=true, + textsize=12, + showhist=true, + histcolor=:slategray +) + # number of labels + L = Base.size(t.ordinates, 1) + + # retrieve labels + l = isnothing(levels) ? (1:L) : levels + + fig = Makie.Figure() + for i in 1:L, j in 1:L + lᵢ, lⱼ = l[i], l[j] + ax = Makie.Axis(fig[i, j]) + + # retrieve coordinates and counts + x = t.abscissas + y = t.ordinates[i, j] + n = t.counts + + # discard empty bins + x = x[n .> 0] + y = y[n .> 0] + n = n[n .> 0] + + # visualize frequencies as bars + if showhist + f = n * (maximum(y) / maximum(n)) / 10 + Makie.barplot!(ax, x, f, color=histcolor, alpha=0.3, gap=0.0) + end + + # visualize transiogram + Makie.scatterlines!(ax, x, y, color=color, markersize=pointsize, linewidth=size, label="$lᵢ → $lⱼ") + + # visualize text counts + if showtext + text = string.(n) + Makie.text!(ax, x, y, text=text, fontsize=textsize) + end + + Makie.axislegend(position=i == j ? :rt : :rb) + end + fig +end + +# ------------ +# THEORETICAL +# ------------ + function transioplot( t::Transiogram; # common transiogram options color=:slategray, size=1.5, - maxlag=nothing + maxlag=nothing, + levels=nothing ) # effective ranges and labels r = GeoStatsFunctions.ranges(t) - l = GeoStatsFunctions.levels(t) + l = isnothing(levels) ? (1:length(r)) : levels # number of labels L = length(l) diff --git a/ext/varioplot.jl b/ext/varioplot.jl index 4d034d6..ed25342 100644 --- a/ext/varioplot.jl +++ b/ext/varioplot.jl @@ -35,10 +35,9 @@ function Makie.plot!(plot::VarioPlot{<:Tuple{EmpiricalVariogram}}) γ = plot[:γ] # get the data - xyn = Makie.@lift values($γ) - x = Makie.@lift $xyn[1] - y = Makie.@lift $xyn[2] - n = Makie.@lift $xyn[3] + x = Makie.@lift $γ.abscissas + y = Makie.@lift $γ.ordinates + n = Makie.@lift $γ.counts # discard empty bins x = Makie.@lift $x[$n .> 0] @@ -78,43 +77,44 @@ function Makie.plot!(plot::VarioPlot{<:Tuple{EmpiricalVarioplane}}) θs = Makie.@lift $v.θs # polar radius - rs = Makie.@lift ustrip.(values($γs[1])[1]) - - # variogram values for all variograms - Z = Makie.@lift let - zs = map($γs) do γ - zs = ustrip.(values(γ)[2]) - - # handle NaN values (i.e. empty bins) - isnan(zs[1]) && (zs[1] = 0) - for i in 2:length(zs) - isnan(zs[i]) && (zs[i] = zs[i - 1]) + rs = Makie.@lift ustrip.($γs[1].abscissas) + + # variogram ordinates for all variograms + H = Makie.@lift let + hs = map($γs) do γ + # retrieve ordinates without units + ys = ustrip.(γ.ordinates) + + # handle NaN ordinates (i.e. empty bins) + isnan(ys[1]) && (ys[1] = 0) + for i in 2:length(ys) + isnan(ys[i]) && (ys[i] = ys[i - 1]) end - zs + ys end - reduce(hcat, zs) + reduce(hcat, hs) end # exploit symmetry θs = Makie.@lift range(0, 2π, length=2 * length($θs)) - Z = Makie.@lift [$Z $Z] + H = Makie.@lift [$H $H] # hide hole at center rs = Makie.@lift [0; $rs] - Z = Makie.@lift [$Z[1:1, :]; $Z] + H = Makie.@lift [$H[1:1, :]; $H] # transpose for plotting - Z = Makie.@lift transpose($Z) + H = Makie.@lift transpose($H) - Makie.surface!(plot, θs, rs, Z, colormap=plot[:colormap], shading=Makie.NoShading) + Makie.surface!(plot, θs, rs, H, colormap=plot[:colormap], shading=Makie.NoShading) # show model range if showrange[] ls = Makie.@lift [ustrip(range(GeoStatsFunctions.fit($rangemodel, γ))) for γ in $γs] ls = Makie.@lift [$ls; $ls] - zs = Makie.@lift fill(maximum($Z) + 1, length($ls)) - Makie.lines!(plot, θs, ls, zs, color=plot[:rangecolor]) + hs = Makie.@lift fill(maximum($H) + 1, length($ls)) + Makie.lines!(plot, θs, ls, hs, color=plot[:rangecolor]) end end diff --git a/src/GeoStatsFunctions.jl b/src/GeoStatsFunctions.jl index 7d6c84f..4ddc58e 100644 --- a/src/GeoStatsFunctions.jl +++ b/src/GeoStatsFunctions.jl @@ -15,6 +15,7 @@ using InteractiveUtils: subtypes using NearestNeighbors: MinkowskiMetric using OhMyThreads: tmapreduce using DataScienceTraits +using TableTransforms using CategoricalArrays using StaticArrays using LinearAlgebra @@ -26,7 +27,6 @@ using Printf import Base: merge, +, * import Meshes: isisotropic -import CategoricalArrays: levels # temporary fix for ⋅ with missing values # https://github.com/JuliaLang/julia/issues/40743 @@ -36,10 +36,10 @@ import LinearAlgebra: ⋅ # utilities include("utils.jl") -# empirical estimates +# empirical functions include("empirical.jl") -# theoretical models +# theoretical functions include("theoretical.jl") # misc operations @@ -49,15 +49,16 @@ include("plotting.jl") include("precompile.jl") export - # empirical variograms + # empirical functions EmpiricalVariogram, - EmpiricalVarioplane, + EmpiricalTransiogram, + + # convenience functions DirectionalVariogram, PlanarVariogram, - distance, - estimator, + EmpiricalVarioplane, - # theoretical function + # theoretical functions GeoStatsFunction, isisotropic, metricball, @@ -96,7 +97,6 @@ export ExponentialTransiogram, # fitting algorithms - VariogramFitAlgo, WeightedLeastSquares, # plotting diff --git a/src/empirical.jl b/src/empirical.jl index 526aade..335ab3c 100644 --- a/src/empirical.jl +++ b/src/empirical.jl @@ -15,5 +15,14 @@ include("empirical/estimalgo.jl") # END-USER TYPES AND FUNCTIONS # ----------------------------- +""" + EmpiricalFunction + +An empirical function estimated from data. +""" +abstract type EmpiricalFunction end + include("empirical/variogram.jl") +include("empirical/transiogram.jl") + include("empirical/varioplane.jl") diff --git a/src/empirical/algorithms.jl b/src/empirical/algorithms.jl index d9d03db..417dc64 100644 --- a/src/empirical/algorithms.jl +++ b/src/empirical/algorithms.jl @@ -3,20 +3,20 @@ # ------------------------------------------------------------------ """ - AccumAlgorithm + AccumAlgo Algorithm used for accumulating values in the estimation of geostatistical functions. """ -abstract type AccumAlgorithm end +abstract type AccumAlgo end """ - accumulate(data, vars, estimator, algo) + accumulate(data, pairs, estimator, algo) -Accumulate values for pairs of variables `vars` stored +Accumulate values for `pairs` of variables stored in `data` with `estimator` and accumulation `algo`. """ -function accumulate(data, vars, estimator::Estimator, algo::AccumAlgorithm) +function accumulate(data, (var₁, var₂), estimator::Estimator, algo::AccumAlgo) # retrieve algorithm parameters nlags = algo.nlags maxlag = algo.maxlag @@ -29,10 +29,15 @@ function accumulate(data, vars, estimator::Estimator, algo::AccumAlgorithm) 𝒯 = values(data) 𝒫 = domain(data) - # vectors for variables + # table columns cols = Tables.columns(𝒯) - z₁ = Tables.getcolumn(cols, Symbol(vars[1])) - z₂ = Tables.getcolumn(cols, Symbol(vars[2])) + + # get column from variable name + get(var) = Tables.getcolumn(cols, Symbol(var)) + + # vectors for variables + z₁ = get(var₁) + z₂ = get(var₂) # neighbors function neighbors = neighfun(algo, 𝒫) @@ -43,27 +48,23 @@ function accumulate(data, vars, estimator::Estimator, algo::AccumAlgorithm) # early exit condition exit = exitfun(algo) - # accumulation type - V = returntype(estimator, z₁, z₂) - - # lag sums and counts + # lag counts and abscissa sums ℒ = Meshes.lentype(𝒫) ns = zeros(Int, nlags) Σx = zeros(ℒ, nlags) + + # ordinate sums + V = returntype(estimator, z₁, z₂) Σy = zeros(V, nlags) - # loop over points inside ball + # loop over pairs of points @inbounds for j in 1:nelements(𝒫) pⱼ = 𝒫[j] - z₁ⱼ = z₁[j] - z₂ⱼ = z₂[j] for i in neighbors(j) # skip to avoid double counting skip(i, j) && continue pᵢ = 𝒫[i] - z₁ᵢ = z₁[i] - z₂ᵢ = z₂[i] # evaluate geospatial lag h = evaluate(distance, pᵢ, pⱼ) @@ -71,37 +72,143 @@ function accumulate(data, vars, estimator::Estimator, algo::AccumAlgorithm) # early exit if out of range exit(h) && continue - # evaluate (cross-)variance - v = formula(estimator, z₁ᵢ, z₁ⱼ, z₂ᵢ, z₂ⱼ) - # bin (or lag) where to accumulate result lag = ceil(Int, h / δh) lag == 0 && @warn "duplicate coordinates found, consider using `UniqueCoords`" - if 0 < lag ≤ nlags && !ismissing(v) - ns[lag] += 1 - Σx[lag] += h - Σy[lag] += v + # accumulate if lag is valid + if 0 < lag ≤ nlags + # evaluate function estimator + v = formula(estimator, z₁[i], z₁[j], z₂[i], z₂[j]) + + # accumulate if value is valid + if !ismissing(v) + ns[lag] += 1 + Σx[lag] += h + Σy[lag] += v + end end end end - # bin (or lag) size - lags = range(δh / 2, stop=maxlag - δh / 2, length=nlags) - # ordinate function ordfun(Σy, n) = normsum(estimator, Σy, n) - # variogram abscissa + # bin (or lag) size + lags = range(δh / 2, stop=maxlag - δh / 2, length=nlags) + + # abscissa xs = @. Σx / ns xs[ns .== 0] .= lags[ns .== 0] - # variogram ordinate + # ordinate ys = @. ordfun(Σy, ns) ys[ns .== 0] .= zero(eltype(ys)) ns, xs, ys end +function accumulate(data, pairs, estimator::CarleEstimator, algo::AccumAlgo) + # retrieve algorithm parameters + nlags = algo.nlags + maxlag = algo.maxlag + distance = algo.distance + + # compute lag size + δh = maxlag / nlags + + # table and point set + 𝒯 = values(data) + 𝒫 = domain(data) + + # table columns + cols = Tables.columns(𝒯) + + # get column from variable name + get(var) = Tables.getcolumn(cols, Symbol(var)) + + # neighbors function + neighbors = neighfun(algo, 𝒫) + + # skip condition + skip = skipfun(algo) + + # early exit condition + exit = exitfun(algo) + + # lag counts and abscissa sums + ℒ = Meshes.lentype(𝒫) + ns = zeros(Int, nlags) + Σx = zeros(ℒ, nlags) + + # ordinate sums + Σ = map(pairs) do (var₁, var₂) + z₁ = get(var₁) + z₂ = get(var₂) + V = returntype(estimator, z₁, z₂) + zeros(V, nlags) + end + + # loop over pairs of points + @inbounds for j in 1:nelements(𝒫) + pⱼ = 𝒫[j] + for i in neighbors(j) + # skip to avoid double counting + skip(i, j) && continue + + pᵢ = 𝒫[i] + + # evaluate geospatial lag + h = evaluate(distance, pᵢ, pⱼ) + + # early exit if out of range + exit(h) && continue + + # bin (or lag) where to accumulate result + lag = ceil(Int, h / δh) + lag == 0 && @warn "duplicate coordinates found, consider using `UniqueCoords`" + + # accumulate if lag is valid + if 0 < lag ≤ nlags + for (k, (var₁, var₂)) in enumerate(pairs) + # retrieve values and sums for pair + z₁ = get(var₁) + z₂ = get(var₂) + Σy = Σ[k] + + # evaluate function estimator + v = formula(estimator, z₁[i], z₁[j], z₂[i], z₂[j]) + + # accumulate if value is valid + if all(!ismissing, v) + ns[lag] += 1 + Σx[lag] += h + Σy[lag] += v + end + end + end + end + end + + # ordinate function + ordfun(Σy, n) = normsum(estimator, Σy, n) + + # bin (or lag) size + lags = range(δh / 2, stop=maxlag - δh / 2, length=nlags) + + # abscissa + xs = @. Σx / ns + xs[ns .== 0] .= lags[ns .== 0] + + # ordinate + Y = map(Σ) do Σy + ys = @. ordfun(Σy, ns) + ys[ns .== 0] .= zero(eltype(ys)) + ys + end + + ns, xs, Y +end + include("algorithms/fullsearch.jl") include("algorithms/ballsearch.jl") diff --git a/src/empirical/algorithms/ballsearch.jl b/src/empirical/algorithms/ballsearch.jl index 9801019..2106acc 100644 --- a/src/empirical/algorithms/ballsearch.jl +++ b/src/empirical/algorithms/ballsearch.jl @@ -8,7 +8,7 @@ Accumulate pairs of points in geospatial data with nearest neighbors inside metric ball. """ -struct BallSearchAccum{ℒ<:Len,D} <: AccumAlgorithm +struct BallSearchAccum{ℒ<:Len,D} <: AccumAlgo nlags::Int maxlag::ℒ distance::D diff --git a/src/empirical/algorithms/fullsearch.jl b/src/empirical/algorithms/fullsearch.jl index 7079394..c36b221 100644 --- a/src/empirical/algorithms/fullsearch.jl +++ b/src/empirical/algorithms/fullsearch.jl @@ -8,7 +8,7 @@ Accumulate pairs of points in geospatial data with exhaustive (or full) search. """ -struct FullSearchAccum{ℒ<:Len,D} <: AccumAlgorithm +struct FullSearchAccum{ℒ<:Len,D} <: AccumAlgo nlags::Int maxlag::ℒ distance::D diff --git a/src/empirical/estimalgo.jl b/src/empirical/estimalgo.jl index 4bf74c3..508443a 100644 --- a/src/empirical/estimalgo.jl +++ b/src/empirical/estimalgo.jl @@ -12,7 +12,6 @@ function estimalgo(dom, nlags, maxlag, distance, estimator, algorithm) @assert nelements(dom) > 1 "variogram requires at least 2 elements" @assert nlags > 0 "number of lags must be positive" @assert maxlag > zero(maxlag) "maximum lag must be positive" - @assert estimator ∈ (:matheron, :cressie) "invalid empirical estimator" @assert algorithm ∈ (:full, :ball) "invalid accumulation algorithm" # choose empirical estimator @@ -20,6 +19,8 @@ function estimalgo(dom, nlags, maxlag, distance, estimator, algorithm) MatheronEstimator() elseif estimator == :cressie CressieEstimator() + elseif estimator == :carle + CarleEstimator() else throw(ArgumentError("invalid estimator")) end diff --git a/src/empirical/estimators.jl b/src/empirical/estimators.jl index 7a60f07..a12f8e5 100644 --- a/src/empirical/estimators.jl +++ b/src/empirical/estimators.jl @@ -13,3 +13,4 @@ returntype(estim::Estimator, z₁, z₂) = typeof(formula(estim, z₁[1], z₁[2 include("estimators/matheron.jl") include("estimators/cressie.jl") +include("estimators/carle.jl") diff --git a/src/empirical/estimators/carle.jl b/src/empirical/estimators/carle.jl new file mode 100644 index 0000000..30068f1 --- /dev/null +++ b/src/empirical/estimators/carle.jl @@ -0,0 +1,21 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENSE in the project root. +# ------------------------------------------------------------------ + +""" + CarleEstimator() + +Carle's transiogram estimator (equation 10 of Carle, S.F. & Fogg, G.E. 1996). + +## References + +* Carle, S.F. & Fogg, G.E. 1996. [Transition probability-based + indicator geostatistics](https://link.springer.com/article/10.1007/BF02083656) +""" +struct CarleEstimator <: Estimator end + +formula(::CarleEstimator, z₁ᵢ, z₁ⱼ, z₂ᵢ, z₂ⱼ) = SVector{2,Int}(z₁ᵢ * z₂ⱼ, z₁ᵢ) + +normsum(::CarleEstimator, Σy, n) = Σy[1] / Σy[2] + +combine(::CarleEstimator, yα, nα, yβ, nβ) = (yα * nα + yβ * nβ) / (nα + nβ) diff --git a/src/empirical/transiogram.jl b/src/empirical/transiogram.jl new file mode 100644 index 0000000..4bd96da --- /dev/null +++ b/src/empirical/transiogram.jl @@ -0,0 +1,76 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENSE in the project root. +# ------------------------------------------------------------------ + +""" + EmpiricalTransiogram(data, var; [parameters]) + +Computes the empirical (a.k.a. experimental) omnidirectional +transiogram for categorical variable `var` stored in geospatial +`data`. + +## Parameters + + * nlags - number of lags (default to `20`) + * maxlag - maximum lag in length units (default to 1/10 of minimum side of bounding box) + * distance - custom distance function (default to `Euclidean` distance) + * algorithm - accumulation algorithm (default to `:ball`) + +Available algorithms: + + * `:full` - loop over all pairs of points in the data + * `:ball` - loop over all points inside maximum lag ball + +All implemented algorithms produce the exact same result. +The `:ball` algorithm is considerably faster when the +maximum lag is much smaller than the bounding box of +the domain of the data. + +See also: [`DirectionalTransiogram`](@ref), [`PlanarTransiogram`](@ref). + +## References + +* Carle, S.F. & Fogg, G.E. 1996. [Transition probability-based + indicator geostatistics](https://link.springer.com/article/10.1007/BF02083656) + +* Carle et al 1998. [Conditional Simulation of Hydrofacies Architecture: + A Transition Probability/Markov Approach](https://doi.org/10.2110/sepmcheg.01.147) +""" +struct EmpiricalTransiogram{ℒ<:Len,V,D,E} <: EmpiricalFunction + counts::Vector{Int} + abscissas::Vector{ℒ} + ordinates::Matrix{Vector{V}} + distance::D + estimator::E +end + +function EmpiricalTransiogram( + data::AbstractGeoTable, + var; + nlags=20, + maxlag=defaultmaxlag(data), + distance=Euclidean(), + algorithm=:ball +) + # retrieve table and domain + 𝒯 = values(data) + 𝒟 = domain(data) + + # empirical estimators are defined on point sets + 𝒮 = georef(𝒯, [centroid(𝒟, i) for i in 1:nelements(𝒟)]) + + # transiograms are estimated based on indicators + ℐ = 𝒮 |> OneHot(var) + + # pairs of indicator variables + ivars = ℐ |> values |> Tables.columns |> Tables.columnnames + pairs = Iterators.product(ivars, ivars) |> collect + + # retrieve estimator and algorithm + estim, algo = estimalgo(𝒟, nlags, maxlag, distance, :carle, algorithm) + + # accumulate data with chosen algorithm + counts, abscissas, ordinates = accumulate(ℐ, pairs, estim, algo) + + EmpiricalTransiogram(counts, abscissas, ordinates, distance, estim) +end diff --git a/src/empirical/variogram.jl b/src/empirical/variogram.jl index b8e05ef..509a90e 100644 --- a/src/empirical/variogram.jl +++ b/src/empirical/variogram.jl @@ -46,10 +46,10 @@ See also: [`DirectionalVariogram`](@ref), [`PlanarVariogram`](@ref), * Hoffimann, J and Zadrozny, B. 2019. [Efficient variography with partition variograms] (https://www.sciencedirect.com/science/article/pii/S0098300419302936) """ -struct EmpiricalVariogram{ℒ<:Len,V,D,E} +struct EmpiricalVariogram{ℒ<:Len,V,D,E} <: EmpiricalFunction counts::Vector{Int} - abscissa::Vector{ℒ} - ordinate::Vector{V} + abscissas::Vector{ℒ} + ordinates::Vector{V} distance::D estimator::E end @@ -59,7 +59,7 @@ function EmpiricalVariogram( var₁, var₂=var₁; nlags=20, - maxlag=_defaultmaxlag(data), + maxlag=defaultmaxlag(data), distance=Euclidean(), estimator=:matheron, algorithm=:ball @@ -75,9 +75,9 @@ function EmpiricalVariogram( estim, algo = estimalgo(𝒟, nlags, maxlag, distance, estimator, algorithm) # accumulate data with chosen algorithm - counts, abscissa, ordinate = accumulate(𝒮, (var₁, var₂), estim, algo) + counts, abscissas, ordinates = accumulate(𝒮, (var₁, var₂), estim, algo) - EmpiricalVariogram(counts, abscissa, ordinate, distance, estim) + EmpiricalVariogram(counts, abscissas, ordinates, distance, estim) end """ @@ -130,28 +130,6 @@ function PlanarVariogram(normal, data::AbstractGeoTable, var₁, var₂=var₁; EmpiricalVariogram(Π, var₁, var₂; kwargs...) end -""" - values(γ) - -Returns the abscissa, the ordinate, and the bin counts -of the empirical variogram `γ`. -""" -Base.values(γ::EmpiricalVariogram) = γ.abscissa, γ.ordinate, γ.counts - -""" - distance(γ) - -Return the distance used to compute the empirical variogram `γ`. -""" -distance(γ::EmpiricalVariogram) = γ.distance - -""" - estimator(γ) - -Return the estimator used to compute the empirical variogram `γ`. -""" -estimator(γ::EmpiricalVariogram) = γ.estimator - """ merge(γα, γβ) @@ -162,10 +140,10 @@ and estimator. function merge(γα::EmpiricalVariogram{V,D,E}, γβ::EmpiricalVariogram{V,D,E}) where {V,D,E} nα = γα.counts nβ = γβ.counts - xα = γα.abscissa - xβ = γβ.abscissa - yα = γα.ordinate - yβ = γβ.ordinate + xα = γα.abscissas + xβ = γβ.abscissas + yα = γα.ordinates + yβ = γβ.ordinates # copy distance and estimator d = γα.distance @@ -193,10 +171,10 @@ end function Base.show(io::IO, γ::EmpiricalVariogram) ioctx = IOContext(io, :compact => true) print(ioctx, "EmpiricalVariogram(") - print(ioctx, "abscissa: ") - _printvec(ioctx, γ.abscissa, 1) - print(ioctx, ", ordinate: ") - _printvec(ioctx, γ.ordinate, 1) + print(ioctx, "abscissas: ") + _printvec(ioctx, γ.abscissas, 1) + print(ioctx, ", ordinates: ") + _printvec(ioctx, γ.ordinates, 1) print(ioctx, ", distance: ", γ.distance) print(ioctx, ", estimator: ", γ.estimator) print(ioctx, ", npairs: ", sum(γ.counts)) @@ -206,10 +184,10 @@ end function Base.show(io::IO, ::MIME"text/plain", γ::EmpiricalVariogram) ioctx = IOContext(io, :compact => true, :limit => true) println(ioctx, "EmpiricalVariogram") - print(ioctx, "├─ abscissa: ") - _printlnvec(ioctx, γ.abscissa, 3) - print(ioctx, "├─ ordinate: ") - _printlnvec(ioctx, γ.ordinate, 3) + print(ioctx, "├─ abscissas: ") + _printlnvec(ioctx, γ.abscissas, 3) + print(ioctx, "├─ ordinates: ") + _printlnvec(ioctx, γ.ordinates, 3) println(ioctx, "├─ distance: ", γ.distance) println(ioctx, "├─ estimator: ", γ.estimator) print(ioctx, "└─ npairs: ", sum(γ.counts)) @@ -219,24 +197,6 @@ end # HELPER FUNCTIONS # ----------------- -_defaultmaxlag(data) = _minside(boundingbox(domain(data))) / 10 - -function _minside(box) - s = _sides(box) - minimum(filter(>(zero(eltype(s))), s)) -end - -_sides(box::Box{<:𝔼}) = sides(box) - -function _sides(box::Box{<:🌐}) - r = vertices(boundary(box)) - s1 = length(Segment(r[1], r[2])) - s2 = length(Segment(r[2], r[3])) - s3 = length(Segment(r[3], r[4])) - s4 = length(Segment(r[4], r[1])) - (s1, s2, s3, s4) -end - function _printlnvec(io, vec, n) _printvec(io, vec, n) println(io) diff --git a/src/fitting.jl b/src/fitting.jl index 243a144..5397a0f 100644 --- a/src/fitting.jl +++ b/src/fitting.jl @@ -6,11 +6,11 @@ fittable() = filter(isstationary, setdiff(subtypes(Variogram), (NuggetEffect, NestedVariogram))) """ - VariogramFitAlgo + FitAlgo An algorithm for fitting theoretical variograms. """ -abstract type VariogramFitAlgo end +abstract type FitAlgo end """ WeightedLeastSquares() @@ -20,7 +20,7 @@ Fit theoretical variogram using weighted least squares with weighting function `w` (e.g. h -> 1/h). If no weighting function is provided, bin counts of empirical variogram are normalized and used as weights. """ -struct WeightedLeastSquares <: VariogramFitAlgo +struct WeightedLeastSquares <: FitAlgo weightfun::Union{Function,Nothing} end @@ -45,7 +45,7 @@ julia> fit(ExponentialVariogram, g, maxsill=1.0) julia> fit(GaussianVariogram, g, WeightedLeastSquares()) ``` """ -fit(V::Type{<:Variogram}, g::EmpiricalVariogram, algo::VariogramFitAlgo=WeightedLeastSquares(); kwargs...) = +fit(V::Type{<:Variogram}, g::EmpiricalVariogram, algo::FitAlgo=WeightedLeastSquares(); kwargs...) = fit_impl(V, g, algo; kwargs...) |> first """ @@ -60,7 +60,7 @@ using algorithm `algo` and return the one with minimum error. julia> fit([SphericalVariogram, ExponentialVariogram], g) ``` """ -function fit(Vs, g::EmpiricalVariogram, algo::VariogramFitAlgo=WeightedLeastSquares(); kwargs...) +function fit(Vs, g::EmpiricalVariogram, algo::FitAlgo=WeightedLeastSquares(); kwargs...) # fit each variogram type res = [fit_impl(V, g, algo; kwargs...) for V in Vs] γs, ϵs = first.(res), last.(res) @@ -84,7 +84,7 @@ julia> fit(Variogram, g, WeightedLeastSquares()) See also `GeoStatsFunctions.fittable()`. """ -fit(::Type{Variogram}, g::EmpiricalVariogram, algo::VariogramFitAlgo=WeightedLeastSquares(); kwargs...) = +fit(::Type{Variogram}, g::EmpiricalVariogram, algo::FitAlgo=WeightedLeastSquares(); kwargs...) = fit(fittable(), g, algo; kwargs...) """ @@ -118,11 +118,13 @@ function fit_impl( maxsill=nothing, maxnugget=nothing ) - # values of empirical variogram - x, y, n = values(g) - # custom ball of given radius - ball(r) = MetricBall(r, distance(g)) + ball(r) = MetricBall(r, g.distance) + + # coordinates of empirical variogram + x = g.abscissas + y = g.ordinates + n = g.counts # discard invalid bins x = x[n .> 0] diff --git a/src/plotting.jl b/src/plotting.jl index 5f005f4..904013c 100644 --- a/src/plotting.jl +++ b/src/plotting.jl @@ -47,6 +47,7 @@ Plot the transiogram `t` with given `options`. * `color` - color of transiogram * `size` - size of transiogram * `maxlag` - maximum lag of variogram +* `levels` - categorical levels ### Notes diff --git a/src/theoretical/transiogram/exponential.jl b/src/theoretical/transiogram/exponential.jl index ede9843..8535278 100644 --- a/src/theoretical/transiogram/exponential.jl +++ b/src/theoretical/transiogram/exponential.jl @@ -3,15 +3,14 @@ # ------------------------------------------------------------------ """ - ExponentialTransiogram(rate; levels=l) - ExponentialTransiogram(ball, rate; levels=l) + ExponentialTransiogram(rate) + ExponentialTransiogram(ball, rate) An exponential transiogram with transition `rate` matrix. -Optionally, specify a metric `ball` to model anisotropy, -and the `levels` or categories. +Optionally, specify a metric `ball` to model anisotropy. - ExponentialTransiogram(lengths, proportions; levels=l) - ExponentialTransiogram(ball, lengths, proportions; levels=l) + ExponentialTransiogram(lengths, proportions) + ExponentialTransiogram(ball, lengths, proportions) Alternatively, build transition rate matrix from mean `lengths` and relative `proportions`. @@ -24,42 +23,36 @@ and relative `proportions`. * Carle et al 1998. [Conditional Simulation of Hydrofacies Architecture: A Transition Probability/Markov Approach](https://doi.org/10.2110/sepmcheg.01.147) """ -struct ExponentialTransiogram{R<:StaticMatrix,L<:AbstractVector,B<:MetricBall} <: Transiogram +struct ExponentialTransiogram{R<:StaticMatrix,B<:MetricBall} <: Transiogram rate::R - levs::L ball::B - function ExponentialTransiogram{R,L,B}(rate, levs, ball) where {R<:StaticMatrix,L<:AbstractVector,B<:MetricBall} + function ExponentialTransiogram{R,B}(rate, ball) where {R<:StaticMatrix,B<:MetricBall} if !allequal(size(rate)) throw(ArgumentError("transition rate matrix must be square")) end - if length(levs) != size(rate, 1) - throw(ArgumentError("levels do not match size of transition rate matrix")) - end - new(rate, levs, ball) + new(rate, ball) end end -function ExponentialTransiogram(ball::MetricBall, rate::AbstractMatrix; levels=1:size(rate, 1)) +function ExponentialTransiogram(ball::MetricBall, rate::AbstractMatrix) srate = SMatrix{size(rate)...}(rate) - ExponentialTransiogram{typeof(srate),typeof(levels),typeof(ball)}(srate, levels, ball) + ExponentialTransiogram{typeof(srate),typeof(ball)}(srate, ball) end -function ExponentialTransiogram(rate::AbstractMatrix; levels=1:size(rate, 1)) +function ExponentialTransiogram(rate::AbstractMatrix) ball = MetricBall(1 / unit(eltype(rate))) - ExponentialTransiogram(ball, rate; levels) + ExponentialTransiogram(ball, rate) end -ExponentialTransiogram(ball::MetricBall, lens::AbstractVector, props::AbstractVector; levels=1:length(lens)) = - ExponentialTransiogram(ball, baseratematrix(lens, props); levels) +ExponentialTransiogram(ball::MetricBall, props::AbstractVector) = + ExponentialTransiogram(ball, baseratematrix(lens, props)) -ExponentialTransiogram(lens::AbstractVector, props::AbstractVector; levels=1:length(lens)) = - ExponentialTransiogram(baseratematrix(lens, props); levels) +ExponentialTransiogram(lens::AbstractVector, props::AbstractVector) = + ExponentialTransiogram(baseratematrix(lens, props)) ranges(t::Transiogram) = 1 ./ -diag(t.rate) -levels(t::ExponentialTransiogram) = t.levs - (t::ExponentialTransiogram)(h) = exp(h * t.rate) # ----------------- diff --git a/src/utils.jl b/src/utils.jl index cc0289a..c151f8c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -12,3 +12,21 @@ function unitless(a::Quantity, b::Quantity) u = Unitful.promote_unit(unit(a), unit(b)) ustrip(u, a), ustrip(u, b) end + +defaultmaxlag(data) = _minside(boundingbox(domain(data))) / 10 + +function _minside(box) + s = _sides(box) + minimum(filter(>(zero(eltype(s))), s)) +end + +_sides(box::Box{<:𝔼}) = sides(box) + +function _sides(box::Box{<:🌐}) + r = vertices(boundary(box)) + s1 = length(Segment(r[1], r[2])) + s2 = length(Segment(r[2], r[3])) + s3 = length(Segment(r[3], r[4])) + s4 = length(Segment(r[4], r[1])) + (s1, s2, s3, s4) +end diff --git a/test/empirical.jl b/test/empirical.jl index 0334fba..620989a 100644 --- a/test/empirical.jl +++ b/test/empirical.jl @@ -2,105 +2,98 @@ @testset "Variogram" begin # homogeneous field has zero variogram sdata = georef((z=ones(3),), [(1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0)]) - γ = EmpiricalVariogram(sdata, :z, nlags=2, maxlag=2.0) - x, y, n = values(γ) - @test x ≈ [1 / 2, √2] * u"m" - @test y[2] == 0.0 - @test n == [0, 3] + g = EmpiricalVariogram(sdata, :z, nlags=2, maxlag=2.0) + @test g.abscissas ≈ [1 / 2, √2] * u"m" + @test g.ordinates[2] == 0.0 + @test g.counts == [0, 3] # basic test on number of lags sdata = georef((z=[1.0, 0.0, 1.0],), [(25.0, 25.0), (50.0, 75.0), (75.0, 50.0)]) - γ = EmpiricalVariogram(sdata, :z, nlags=20, maxlag=1.0) - x, y, n = values(γ) - @test length(x) == 20 - @test length(y) == 20 - @test length(n) == 20 + g = EmpiricalVariogram(sdata, :z, nlags=20, maxlag=1.0) + @test length(g.abscissas) == 20 + @test length(g.ordinates) == 20 + @test length(g.counts) == 20 # empirical variogram on integer coordinates sdata = georef((z=ones(3),), [(1, 0, 0), (0, 1, 0), (0, 0, 1)]) - γ = EmpiricalVariogram(sdata, :z, nlags=2, maxlag=2, algorithm=:full) - x, y, n = values(γ) - @test x ≈ [1 / 2, √2] * u"m" - @test y[2] == 0.0 - @test n == [0, 3] + g = EmpiricalVariogram(sdata, :z, nlags=2, maxlag=2, algorithm=:full) + @test g.abscissas ≈ [1 / 2, √2] * u"m" + @test g.ordinates[2] == 0.0 + @test g.counts == [0, 3] # empirical variogram with only missing data z = Union{Float64,Missing}[missing, missing, missing] 𝒟 = georef((z=z,), rand(Point, 3)) - γ = EmpiricalVariogram(𝒟, :z, maxlag=1.0, nlags=5) - x, y, n = values(γ) - @test x == [0.1, 0.3, 0.5, 0.7, 0.9] * u"m" - @test all(iszero.(n)) + g = EmpiricalVariogram(𝒟, :z, maxlag=1.0, nlags=5) + @test g.abscissas == [0.1, 0.3, 0.5, 0.7, 0.9] * u"m" + @test all(iszero, g.counts) # accumulation algorithms give the same result rng = StableRNG(123) sdata = georef((z=rand(rng, 1000),), rand(rng, Point, 1000)) - γ₁ = EmpiricalVariogram(sdata, :z, maxlag=0.01, algorithm=:full) - γ₂ = EmpiricalVariogram(sdata, :z, maxlag=0.01, algorithm=:ball) - @test isequal(values(γ₁), values(γ₂)) + g₁ = EmpiricalVariogram(sdata, :z, maxlag=0.01, algorithm=:full) + g₂ = EmpiricalVariogram(sdata, :z, maxlag=0.01, algorithm=:ball) + @test isequal(g₁.abscissas, g₂.abscissas) + @test isequal(g₁.ordinates, g₂.ordinates) + @test isequal(g₁.counts, g₂.counts) # custom distance is recorded rng = StableRNG(123) sdata = georef((z=rand(rng, 2),), [Point(LatLon(0.0, 0.0)), Point(LatLon(0.0, 90.0))]) - γ = EmpiricalVariogram(sdata, :z, distance=Haversine(6371.0), algorithm=:full) - @test distance(γ) == Haversine(6371.0) + g = EmpiricalVariogram(sdata, :z, distance=Haversine(6371.0), algorithm=:full) + @test g.distance == Haversine(6371.0) # print methods rng = StableRNG(123) d = georef((z=rand(rng, 100, 100),)) - γ = EmpiricalVariogram(d, :z) - @test sprint(show, γ) == - "EmpiricalVariogram(abscissa: [0.25 m, ..., 9.93304 m], ordinate: [0.0, ..., 0.0841979], distance: Euclidean(0.0), estimator: MatheronEstimator(), npairs: 1447200)" - @test sprint(show, MIME"text/plain"(), γ) == """ + g = EmpiricalVariogram(d, :z) + @test sprint(show, g) == + "EmpiricalVariogram(abscissas: [0.25 m, ..., 9.93304 m], ordinates: [0.0, ..., 0.0841979], distance: Euclidean(0.0), estimator: MatheronEstimator(), npairs: 1447200)" + @test sprint(show, MIME"text/plain"(), g) == """ EmpiricalVariogram - ├─ abscissa: [0.25 m, 1.0 m, 1.41421 m, ..., 8.7407 m, 9.28182 m, 9.93304 m] - ├─ ordinate: [0.0, 0.0843099, 0.0845995, ..., 0.0838336, 0.0839823, 0.0841979] + ├─ abscissas: [0.25 m, 1.0 m, 1.41421 m, ..., 8.7407 m, 9.28182 m, 9.93304 m] + ├─ ordinates: [0.0, 0.0843099, 0.0845995, ..., 0.0838336, 0.0839823, 0.0841979] ├─ distance: Euclidean(0.0) ├─ estimator: MatheronEstimator() └─ npairs: 1447200""" # test variography with compositional data data = georef((z=rand(Composition{3}, 100),), rand(Point, 100)) - γ = EmpiricalVariogram(data, :z, maxlag=1.0, algorithm=:full) - x, y, n = values(γ) - @test all(≥(0u"m"), x) - @test all(≥(0), y) - @test all(≥(0), n) + g = EmpiricalVariogram(data, :z, maxlag=1.0, algorithm=:full) + @test all(≥(0u"m"), g.abscissas) + @test all(≥(0), g.ordinates) + @test all(≥(0), g.counts) # test variography with unitful data data = georef((z=[1 * u"K" for i in 1:100],), rand(Point, 100)) - γ = EmpiricalVariogram(data, :z, nlags=20) - x, y, n = values(γ) - @test all(≥(0u"m"), x) - @test y == fill(0.0 * u"K^2", 20) + g = EmpiricalVariogram(data, :z, nlags=20) + @test all(≥(0u"m"), g.abscissas) + @test g.ordinates == fill(0.0 * u"K^2", 20) # Matheron's vs Cressie's estimator img = readdlm(joinpath(datadir, "Gaussian30x10.txt")) data = georef((; Z=img)) - γ₁ = EmpiricalVariogram(data, :Z, maxlag=50.0, estimator=:matheron) - γ₂ = EmpiricalVariogram(data, :Z, maxlag=50.0, estimator=:cressie) - x₁, y₁, n₁ = values(γ₁) - x₂, y₂, n₂ = values(γ₂) - @test x₁ == x₂ - @test all(isapprox.(y₁, y₂, atol=0.1)) - @test n₁ == n₂ + g₁ = EmpiricalVariogram(data, :Z, maxlag=50.0, estimator=:matheron) + g₂ = EmpiricalVariogram(data, :Z, maxlag=50.0, estimator=:cressie) + @test g₁.abscissas == g₂.abscissas + @test all(isapprox.(g₁.ordinates, g₂.ordinates, atol=0.1)) + @test g₁.counts == g₂.counts # specify variables as strings img = readdlm(joinpath(datadir, "Gaussian30x10.txt")) data = georef((; Z=img)) - γ = EmpiricalVariogram(data, "Z", maxlag=50.0) - x, y, n = values(γ) - @test all(≥(0u"m"), x) - @test all(>(0.8), y[11:end]) - @test all(≥(0), n) + g = EmpiricalVariogram(data, "Z", maxlag=50.0) + @test all(≥(0u"m"), g.abscissas) + @test all(>(0.8), g.ordinates[11:end]) + @test all(≥(0), g.counts) end @testset "Varioplane" begin img = readdlm(joinpath(datadir, "anisotropic.tsv")) data = georef((z=img,)) - γ = EmpiricalVarioplane(data, :z, maxlag=50.0) - @test sprint(show, γ) == "EmpiricalVarioplane" - @test sprint(show, MIME"text/plain"(), γ) == """ + g = EmpiricalVarioplane(data, :z, maxlag=50.0) + @test sprint(show, g) == "EmpiricalVarioplane" + @test sprint(show, MIME"text/plain"(), g) == """ EmpiricalVarioplane 50 angles └─0.00° @@ -120,19 +113,18 @@ # merge operation does not produce NaN dir = (0.286788, -0.496732, -0.819152) 𝒟 = georef(CSV.File(joinpath(datadir, "nanlags.csv")), (:X, :Y, :Z)) - γ = DirectionalVariogram(dir, 𝒟, :Cu, dtol=45, maxlag=150, nlags=20) - x, y, n = values(γ) - @test !any(isnan.(x)) - @test !any(isnan.(y)) - @test !any(isnan.(n)) + g = DirectionalVariogram(dir, 𝒟, :Cu, dtol=45, maxlag=150, nlags=20) + @test !any(isnan.(g.abscissas)) + @test !any(isnan.(g.ordinates)) + @test !any(isnan.(g.counts)) # directional variogram and known anisotropy ratio img = readdlm(joinpath(datadir, "anisotropic.tsv")) sdata = georef((z=img,)) - γhor = DirectionalVariogram((1.0, 0.0), sdata, :z, maxlag=50.0) - γver = DirectionalVariogram((0.0, 1.0), sdata, :z, maxlag=50.0) - γₕ = GeoStatsFunctions.fit(GaussianVariogram, γhor) - γᵥ = GeoStatsFunctions.fit(GaussianVariogram, γver) + gₕ = DirectionalVariogram((1.0, 0.0), sdata, :z, maxlag=50.0) + gᵥ = DirectionalVariogram((0.0, 1.0), sdata, :z, maxlag=50.0) + γₕ = GeoStatsFunctions.fit(GaussianVariogram, gₕ) + γᵥ = GeoStatsFunctions.fit(GaussianVariogram, gᵥ) @test range(γₕ) / range(γᵥ) ≈ 3.0 atol = 0.1 end @@ -140,26 +132,22 @@ # directional equals planar rotated by 90 degrees in 2D img = readdlm(joinpath(datadir, "anisotropic.tsv")) sdata = georef((z=img,)) - γ₁ = PlanarVariogram((0.0, 1.0), sdata, :z, maxlag=50.0) - γ₂ = DirectionalVariogram((1.0, 0.0), sdata, :z, maxlag=50.0) - x₁, y₁, n₁ = values(γ₁) - x₂, y₂, n₂ = values(γ₂) - @test x₁ == x₂ - @test y₁ ≈ y₂ - @test n₁ == n₂ - γ₁ = PlanarVariogram((1.0, 0.0), sdata, :z, maxlag=50.0) - γ₂ = DirectionalVariogram((0.0, 1.0), sdata, :z, maxlag=50.0) - x₁, y₁, n₁ = values(γ₁) - x₂, y₂, n₂ = values(γ₂) - @test x₁ == x₂ - @test y₁ ≈ y₂ - @test n₁ == n₂ + g₁ = PlanarVariogram((0.0, 1.0), sdata, :z, maxlag=50.0) + g₂ = DirectionalVariogram((1.0, 0.0), sdata, :z, maxlag=50.0) + @test g₁.abscissas == g₂.abscissas + @test g₁.ordinates ≈ g₂.ordinates + @test g₁.counts == g₂.counts + g₁ = PlanarVariogram((1.0, 0.0), sdata, :z, maxlag=50.0) + g₂ = DirectionalVariogram((0.0, 1.0), sdata, :z, maxlag=50.0) + @test g₁.abscissas == g₂.abscissas + @test g₁.ordinates ≈ g₂.ordinates + @test g₁.counts == g₂.counts # planar variogram and known anisotropy ratio - γhor = PlanarVariogram((0.0, 1.0), sdata, :z, maxlag=50.0) - γver = PlanarVariogram((1.0, 0.0), sdata, :z, maxlag=50.0) - γₕ = GeoStatsFunctions.fit(GaussianVariogram, γhor) - γᵥ = GeoStatsFunctions.fit(GaussianVariogram, γver) + gₕ = PlanarVariogram((0.0, 1.0), sdata, :z, maxlag=50.0) + gᵥ = PlanarVariogram((1.0, 0.0), sdata, :z, maxlag=50.0) + γₕ = GeoStatsFunctions.fit(GaussianVariogram, gₕ) + γᵥ = GeoStatsFunctions.fit(GaussianVariogram, gᵥ) @test range(γₕ) / range(γᵥ) ≈ 3.0 atol = 0.1 end end