From 8698011019a289fdf763fb764850387fbc758af5 Mon Sep 17 00:00:00 2001 From: Datseris Date: Tue, 22 Sep 2020 11:56:02 +0200 Subject: [PATCH 1/5] Improve temporal: identify non-date sampling --- src/physical_dimensions/temporal.jl | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/physical_dimensions/temporal.jl b/src/physical_dimensions/temporal.jl index dd6daffb..6bab9ea0 100644 --- a/src/physical_dimensions/temporal.jl +++ b/src/physical_dimensions/temporal.jl @@ -90,7 +90,6 @@ function time_in_days(t::AbstractArray{<:TimeType}, T = Float32) end time_in_days(t::AbstractArray{<:Real}) = t - """ temporal_sampling(x) → symbol Return the temporal sampling type of `x`, which is either an array of `Date`s or @@ -100,6 +99,7 @@ Possible return values are: - `:yearly`, where all dates have the same month+day, but different year. - `:monthly`, where all dates have the same day, but different month. - `:daily`, where the temporal difference between dates are exactly 1 day. +- `:hourly`, where the temporal difference between entries is exactly 1 hour. - `:other`, which means that `x` doesn't fall to any of the above categories. For vector input, only the first 3 entries of the temporal information are used @@ -107,6 +107,8 @@ to deduce the sampling (while for ranges, checking the step is enough). """ temporal_sampling(A::AbDimArray) = temporal_sampling(dims(A, Time).val) function temporal_sampling(t::AbstractVector{<:TimeType}) + #TODO: implement hourly! + sampled_less_than_date(t) && error("Hourly sampling not yet implemented") d1 = daymonth(t[2]) .- daymonth(t[1]) d2 = daymonth(t[3]) .- daymonth(t[2]) samemonth = d1[2] == d1[2] == 0 @@ -128,8 +130,24 @@ temporal_sampling(t::AbstractVector) = error("Need `<:TimeType` elements.") temporal_sampling(t::StepRange{<:Any,Month}) = :monthly temporal_sampling(t::StepRange{<:Any,Year}) = :yearly temporal_sampling(t::StepRange{<:Any,Day}) = :daily +temporal_sampling(t::StepRange{<:Any,Hour}) = :hourly temporal_sampling(t::StepRange{<:Any,<:Any}) = :other +"return true if hours or minutes are ≠ 0." +function sampled_less_than_date(t::AbstractVector{<:DateTime}) + r = 1:length(t) + any(i -> Dates.hour(t[i]) ≠ 0, r) || any(i -> Dates.minute(t[i]) ≠ 0, r) +end +sampled_less_than_date(t::AbstractVector{<:Date}) = false + +"return the appropriate subtype of Dates.Period." +function tsamp2period(tsamp) + tsamp == :monthly && return Month(1) + tsamp == :yearly && return Year(1) + tsamp == :daily && return Day(1) + error("Don't know the period of $tsamp sampling!") +end + ######################################################################### # temporal statistics ######################################################################### From 06a4521286e39112d675aa2e00f6ef36a9c5e309 Mon Sep 17 00:00:00 2001 From: Datseris Date: Tue, 22 Sep 2020 11:56:11 +0200 Subject: [PATCH 2/5] add functions that transform vectors to ranges --- src/core/loading_nc.jl | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/core/loading_nc.jl b/src/core/loading_nc.jl index 37e8cdd9..bbd7717e 100644 --- a/src/core/loading_nc.jl +++ b/src/core/loading_nc.jl @@ -71,15 +71,16 @@ end # TODO: Allow this function to take as input a tuple of indices, e.g. (:, :, 1:5) # and only load this part, and correctly and instantly make it a ClimArray, which -# can solve "large memory" or "large data" problems. +# can solve "large memory" or "large data" problems. This funcionality +# must be sure to load the correct ranges of dimensions as well though! + function ClimArray(ds::NCDatasets.AbstractDataset, var::String; eqarea = false) svar = string(var) cfvar = ds[svar] attrib = Dict(cfvar.attrib) A = cfvar |> Array if eqarea - # TODO: I have to re-work this code to be more general and allow other dimensions - # as well!!!! + # TODO: This piece of code is specific to CDO output... if haskey(ds, "ncells") # this is the equal area grid, so we make a Coord dimension lon = ds["lon"] |> Array .|> wrap_lon lat = ds["lat"] |> Array @@ -119,6 +120,35 @@ function create_dims(ds::NCDatasets.AbstractDataset, dnames) return dim_values .|> true_dims end +######################################################################### +# Making vectors → ranges +######################################################################### +function vector2range(x::Vector{<:Real}) + dx = x[2]-x[1] + for i in 3:length(x) + x[i]-x[i-1] ≠ dx && return x # if no constant step, return array as is + end + r = x[1]:dx:x[end] + @assert r == x + return r +end + +function vector2range(t::Vector{<:DateTime}) + !sampled_less_than_date(t) && return vector2range(Date.(t)) + # TODO: implement hourly sampling here + @warn "Hourly sampling not yet implemented." + return t +end + +function vector2range(t::Vector{<:Date}) + tsamp = temporal_sampling(t) + period = tsamp2period(tsamp) + r = t[1]:period:t[end] + @assert r == t + return r +end + + ######################################################################### # Equal area related ######################################################################### From b0f49665815858201fecdbaaadad37a67d12e6be Mon Sep 17 00:00:00 2001 From: Datseris Date: Tue, 22 Sep 2020 12:07:04 +0200 Subject: [PATCH 3/5] incorporate vector2range in loading .nc files --- src/core/aggregation.jl | 8 +------- src/core/coredefs.jl | 2 ++ src/core/loading_nc.jl | 13 +++++++++---- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/core/aggregation.jl b/src/core/aggregation.jl index f3ef1adf..3957e419 100644 --- a/src/core/aggregation.jl +++ b/src/core/aggregation.jl @@ -54,13 +54,7 @@ function dropagg(f, A::AbDimArray, dims) DimensionalData.rebuild(r, Array(r.data)) end -function dimindex(A::AbDimArray, Dim) - @assert hasdim(A, Dim) - return findfirst(x -> x isa Dim, dims(A)) -end - -Base.ones(A::AbDimArray) = basetypeof(A)(ones(size(A)), dims(A)) - +dimindex(A::AbDimArray, Dim) = DimensionalData.dimnum(A, Dim) ######################################################################### # Other dimensions diff --git a/src/core/coredefs.jl b/src/core/coredefs.jl index c0bc8283..dccdfefa 100644 --- a/src/core/coredefs.jl +++ b/src/core/coredefs.jl @@ -133,3 +133,5 @@ function Base.summary(io::IO, A::ClimArray) print(io, '\n') end end + +Base.ones(A::AbDimArray) = basetypeof(A)(ones(size(A)), dims(A)) diff --git a/src/core/loading_nc.jl b/src/core/loading_nc.jl index bbd7717e..2cbf383d 100644 --- a/src/core/loading_nc.jl +++ b/src/core/loading_nc.jl @@ -44,9 +44,13 @@ A = ClimArray(file, "tow_sw_all") ``` (of course you can just do `NCDataset("file.nc")` for single files). -If there are no missing values in the data (according to CF standards), the -returned array is automatically converted to a concrete type (i.e. `Union{Float32, Missing}` -becomes `Float32`). +We do two performance improvements while loading the data: +1. If there are no missing values in the data (according to CF standards), the + returned array is automatically converted to a concrete type (i.e. `Union{Float32, Missing}` + becomes `Float32`). +2. Dimensions that are ranges (i.e. sampled with constant step size) are automatically + transformed to a standard Julia `Range` type (which makes sub-selecting faster). + At the moment, support for auto-loading equal area space types does not exist, see [Types of spatial coordinates](@ref). But @@ -117,7 +121,8 @@ Create a tuple of `Dimension`s from the `dnames` (tuple of strings). function create_dims(ds::NCDatasets.AbstractDataset, dnames) true_dims = getindex.(Ref(COMMONNAMES), dnames) dim_values = Array.(getindex.(Ref(ds), dnames)) - return dim_values .|> true_dims + optimal_values = vector2range.(dim_values) + return optimal_values .|> true_dims end ######################################################################### From 20f1e840208ab1759de6cf9de5138b55909d3df4 Mon Sep 17 00:00:00 2001 From: Datseris Date: Tue, 22 Sep 2020 14:08:22 +0200 Subject: [PATCH 4/5] small bugfix in time weighting --- src/physical_dimensions/temporal.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/physical_dimensions/temporal.jl b/src/physical_dimensions/temporal.jl index 6bab9ea0..98348467 100644 --- a/src/physical_dimensions/temporal.jl +++ b/src/physical_dimensions/temporal.jl @@ -253,7 +253,7 @@ function timeagg(f, T::AbstractVector{<:TimeType}, a::Vector, w = nothing) # ver mys = maxyearspan(T, tsamp) t = view(T, 1:mys) if tsamp == :monthly - dimw = daysinmonth.(t) + dimw = float.(daysinmonth.(t)) !isnothing(w) && (dimw .*= view(w, 1:mys)) return f(view(a, 1:mys), weights(dimw)) else From e117533b0b0fe1622a711d1c11ee3847b4e54f01 Mon Sep 17 00:00:00 2001 From: Datseris Date: Tue, 22 Sep 2020 14:19:13 +0200 Subject: [PATCH 5/5] quick todo note --- test/runtests.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index e3ba85ac..2b509a00 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -88,6 +88,8 @@ end res = timeagg(mean, A, w) @test all(res .≈ A[Time(5)]) @test dims(A, Lon) == dims(res, Lon) + + # TODO: more tests needed here, e.g. for timeagg(mean, t, a, w) end @testset "Advanced temporal manipulation" begin