diff --git a/.gitignore b/.gitignore index 21b1ed9f..78bdd411 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.nc Manifest.toml ################################################################################ # DrWatson Project Structure # diff --git a/Project.toml b/Project.toml index 5c6bb5d8..adca4bcc 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ClimateBase" uuid = "35604d93-0fb8-4872-9436-495b01d137e2" authors = ["Datseris ", "Philippe Roy "] -version = "0.8.0" +version = "0.9.0" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/docs/src/index.md b/docs/src/index.md index fb7c936d..448c5eb4 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -42,8 +42,8 @@ C = latmean(B) ``` where in this averaging process each data point is weighted by the cosine of its latitude. -## Making a `ClimArray` -You can create a `ClimArray` yourself, or you can load data from an `.nc` file with CF-conventions, using `ClimArray`. +### Making a `ClimArray` +You can create a `ClimArray` yourself, or you can load data from an `.nc` file with CF-conventions, see [NetCDF IO](@ref). ```@docs ClimArray(::AbstractArray, ::Tuple) ``` @@ -56,7 +56,9 @@ end ``` We explicitly assume that `Lon, Lat` are measured in degrees and not radians or meters (extremely important for spatial averaging processes). ---- +## NetCDF IO +ClimateBase.jl has support for `file.nc ⇆ ClimArray`. +To load a `ClimArray` directly from an `.nc` file do: ```@docs ClimArray(::Union{String, Vector{String}}) ``` @@ -73,6 +75,11 @@ nckeys ncdetails ``` +You can also write a bunch of `ClimArray`s directly into an `.nc` file with +```@docs +climarrays_to_nc +``` + ## Temporal Functions related with the `Time` dimension. ```@docs @@ -112,6 +119,7 @@ spacemean spaceagg hemispheric_means hemispheric_functions +lonlatfirst ``` ## General aggregation diff --git a/src/ClimateBase.jl b/src/ClimateBase.jl index 9f46b3ec..9f8d4ac3 100644 --- a/src/ClimateBase.jl +++ b/src/ClimateBase.jl @@ -2,7 +2,7 @@ module ClimateBase # TODO: Be sure all exported names have docstrings include("core/coredefs.jl") -include("core/loading_nc.jl") +include("core/nc_io.jl") include("core/aggregation.jl") include("physical_dimensions/spatial.jl") diff --git a/src/core/coredefs.jl b/src/core/coredefs.jl index 8c84b68d..cd835a88 100644 --- a/src/core/coredefs.jl +++ b/src/core/coredefs.jl @@ -24,6 +24,10 @@ export EqArea, Grid, spacestructure STANDARD_DIMS = (Lon, Lat, Time, Hei, Pre, Coord) +""" + COMMONNAMES +A dictionary of common names of dimensions (as strings) to actual dimension types. +""" const COMMONNAMES = Dict( "lat" => Lat, "latitude" => Lat, @@ -37,6 +41,7 @@ const COMMONNAMES = Dict( "level" => Pre, ) + # the trait EqArea is for equal area grids. Functions can use the `spacestructure` and # dispatch on `EqArea` or other types while still being type-stable struct EqArea end @@ -64,6 +69,7 @@ struct ClimArray{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Me} <: AbstractDime attrib::Me end ClimArray(A::DimensionalArray) = ClimArray(A.data, A.dims, A.refdims, A.name, A.metadata) +ClimArray(A::ClimArray; name = A.name, attrib = A.attrib) = ClimArray(A.data, A.dims, A.refdims, name, attrib) """ ClimArray(A::Array, dims::Tuple; name = "", attrib = nothing) diff --git a/src/core/loading_nc.jl b/src/core/nc_io.jl similarity index 60% rename from src/core/loading_nc.jl rename to src/core/nc_io.jl index 48614243..9e6c5856 100644 --- a/src/core/loading_nc.jl +++ b/src/core/nc_io.jl @@ -1,190 +1,290 @@ -#= -Code related with loading .nc file data directly into a dimensional array -An initial version of parts of this code was taken from: -https://github.com/rafaqz/GeoData.jl -=# -using NCDatasets -export NCDataset -export nckeys, ncdetails -######################################################################### -# NCDatasets → DimensionalArray convertions and loading -######################################################################### -""" - nckeys(file::String) -Return all keys of the `.nc` file in `file`. -""" -function nckeys(path::String) - NCDataset(path) do ds - return keys(ds) - end -end -nckeys(a::NCDataset) = keys(a) - -""" - ncdetails(file::String, io = stdout) -Print details about the `.nc` file in `file` on `io`. -""" -function ncdetails(file::String, io = stdout) - NCDataset(file) do ds - show(io, MIME"text/plain"(), ds) - end -end -ncdetails(ds::NCDataset, io = stdout) = show(io, MIME"text/plain"(), ds) - -""" - ClimArray(file::NCDataset, var::String, name = var) -> A -Load the variable `var` from the `file` and convert it -into a `ClimArray` which also contains the variable attributes as a dictionary. - -Notice that `file` should be an `NCDataset`, which allows you to lazily combine different -`.nc` data (typically split by time), e.g. -```julia -alldata = ["toa_fluxes_2020_\$(i).nc" for i in 1:12] -file = NCDataset(alldata; aggdim = "time") -A = ClimArray(file, "tow_sw_all") -``` -(of course you can just do `NCDataset("file.nc")` for single files). - -We do two performance improvements while loading the data: -1. If there are no missing values in the data (according to CF standards), the - returned array is automatically converted to a concrete type (i.e. `Union{Float32, Missing}` - becomes `Float32`). -2. Dimensions that are ranges (i.e. sampled with constant step size) are automatically - transformed to a standard Julia `Range` type (which makes sub-selecting faster). - - -At the moment, support for auto-loading equal area space types does not exist, -see [Types of spatial coordinates](@ref). But -you can easily transform them yourself into a `ClimArray` by doing e.g.: -```julia -file = NCDataset("some_file_with_eqarea.nc") -lons = file["lon"] -lats = file["lat"] -coords = [SVector(lo, la) for (lo, la) in zip(lons, lats)] -t = file["time"] -dimensions = (Coord(coords), Time(t)) -data = file["actual_data_like_radiation"] -A = ClimArray(data, dimensions) -``` -""" -function ClimArray(path::Union{String, Vector{String}}, args...; kwargs...) - NCDataset(path) do ds - data = ClimArray(ds, args...; kwargs...) - return data - end -end - -# TODO: Allow this function to take as input a tuple of indices, e.g. (:, :, 1:5) -# and only load this part, and correctly and instantly make it a ClimArray, which -# can solve "large memory" or "large data" problems. This funcionality -# must be sure to load the correct ranges of dimensions as well though! - -function ClimArray(ds::NCDatasets.AbstractDataset, var::String, name = var; eqarea = false) - svar = string(var) - cfvar = ds[svar] - attrib = Dict(cfvar.attrib) - A = cfvar |> Array - if eqarea - # TODO: This piece of code is specific to CDO output... - if haskey(ds, "ncells") # this is the equal area grid, so we make a Coord dimension - lon = ds["lon"] |> Array .|> wrap_lon - lat = ds["lat"] |> Array - time = ds["time"] |> Array - lonlat = [SVector(lon[i], lat[i]) for i in 1:length(lon)] - # here we sort lonlat and A in ascending latitude order, - # because the CDO output has reverse or even totally unsorted order - si = sortperm(lonlat, by = reverse) - data = ClimArray(A[si, :], (Coord(lonlat[si]), Time(time)); - attrib = attrib, name = svar) - elseif haskey(ds, "reduced_points") - lonlat = reduced_grid_to_points(ds["lat"], ds["reduced_points"]) - si = sortperm(lonlat, by = reverse) - time = ds["time"] |> Array - data = ClimArray(A[si, :], (Coord(lonlat[si]), Time(time)); - name = svar, attrib = attrib) - else - error("Don't know how to handle this equal area grid!") - end - else # standard variables - dnames = Tuple(NCDatasets.dimnames(cfvar)) - data = ClimArray(A, create_dims(ds, dnames); name = Symbol(name), attrib = attrib) - end - if !any(ismissing, data) - data = nomissing(data) - end - return data -end - -""" - create_dims(ds::NCDatasets.AbstractDataset, dnames) -Create a tuple of `Dimension`s from the `dnames` (tuple of strings). -""" -function create_dims(ds::NCDatasets.AbstractDataset, dnames) - # true_dims = getindex.(Ref(COMMONNAMES), dnames) - true_dims = to_proper_dimensions(dnames) - dim_values = Array.(getindex.(Ref(ds), dnames)) - optimal_values = vector2range.(dim_values) - return optimal_values .|> true_dims -end - -function to_proper_dimensions(dnames) - r = [] - for n in dnames - if haskey(COMMONNAMES, n) - push!(r, COMMONNAMES[n]) - else - @warn """ - Dimension name "$n" not in common names. Strongly recommended to ask for - adding this name to COMMONNAMES on github. Making generic dimension for now... - """ - push!(r, Dim{Symbol(n)}) - end - end - return (r...,) -end - -export Dim # for generic dimensions this must be exported - -######################################################################### -# Making vectors → ranges -######################################################################### -function vector2range(x::Vector{<:Real}) - dx = x[2]-x[1] - for i in 3:length(x) - x[i]-x[i-1] ≠ dx && return x # if no constant step, return array as is - end - r = x[1]:dx:x[end] - @assert r == x - return r -end - -function vector2range(t::Vector{<:DateTime}) - !sampled_less_than_date(t) && return vector2range(Date.(t)) - # TODO: implement hourly sampling here - @warn "Hourly sampling not yet implemented." - return t -end - -function vector2range(t::Vector{<:Date}) - tsamp = temporal_sampling(t) - period = tsamp2period(tsamp) - r = t[1]:period:t[end] - @assert r == t - return r -end - - -######################################################################### -# Equal area related -######################################################################### -function reduced_grid_to_points(lat, reduced_points) - lonlat = SVector{2, Float32}[] - for (i, θ) in enumerate(lat) - n = reduced_points[i] - dλ = Float32(360/n) - for j in 0:n-1 - push!(lonlat, SVector(0 + dλ*j, θ)) - end - end - return lonlat -end +#= +Code related with input output (IO) of .nc files directly to/from ClimArrays +An initial version of parts of this code was taken from: +https://github.com/rafaqz/GeoData.jl +=# +using NCDatasets +export NCDataset +export nckeys, ncdetails +export climarrays_to_nc + +dim_to_commonname(::Lat) = "lat" +dim_to_commonname(::Lon) = "lon" +dim_to_commonname(::Time) = "time" +dim_to_commonname(::Pre) = "level" +dim_to_commonname(D::Dim) = string(DimensionalData.name(D)) + +######################################################################### +# NCDatasets → DimensionalArray convertions and loading +######################################################################### +""" + nckeys(file::String) +Return all keys of the `.nc` file in `file`. +""" +function nckeys(path::String) + NCDataset(path) do ds + return keys(ds) + end +end +nckeys(a::NCDataset) = keys(a) + +""" + ncdetails(file::String, io = stdout) +Print details about the `.nc` file in `file` on `io`. +""" +function ncdetails(file::String, io = stdout) + NCDataset(file) do ds + show(io, MIME"text/plain"(), ds) + end +end +ncdetails(ds::NCDataset, io = stdout) = show(io, MIME"text/plain"(), ds) + +""" + ClimArray(file::Union{String,NCDataset}, var::String, name = var) -> A +Load the variable `var` from the `file` and convert it +into a `ClimArray` which also contains the variable attributes as a dictionary. +Dimension attributes are also given to the dimensions of `A`, if any exist. + +Notice that `file` should be an `NCDataset`, which allows you to lazily combine different +`.nc` data (typically split by time), e.g. +```julia +alldata = ["toa_fluxes_2020_\$(i).nc" for i in 1:12] +file = NCDataset(alldata; aggdim = "time") +A = ClimArray(file, "tow_sw_all") +``` +(but you can also directly give the string to a single file `"file.nc"` in `ClimArray` +if data are contained to a single file for single files). + +We do two performance improvements while loading the data: +1. If there are no missing values in the data (according to CF standards), the + returned array is automatically converted to a concrete type (i.e. `Union{Float32, Missing}` + becomes `Float32`). +2. Dimensions that are ranges (i.e. sampled with constant step size) are automatically + transformed to a standard Julia `Range` type (which makes sub-selecting faster). + + +At the moment, support for auto-loading equal area space types does not exist, +see [Types of spatial coordinates](@ref). +But can transform them yourself into a `ClimArray` by doing e.g.: +```julia +file = NCDataset("some_file_with_eqarea.nc") +lons = file["lon"] +lats = file["lat"] +coords = [SVector(lo, la) for (lo, la) in zip(lons, lats)] +t = file["time"] +dimensions = (Coord(coords), Time(t)) +data = file["actual_data_like_radiation"] +A = ClimArray(data, dimensions) +``` +""" +function ClimArray(path::Union{String, Vector{String}}, args...; kwargs...) + NCDataset(path) do ds + data = ClimArray(ds, args...; kwargs...) + return data + end +end + +# TODO: Allow this function to take as input a tuple of indices, e.g. (:, :, 1:5) +# and only load this part, and correctly and instantly make it a ClimArray, which +# can solve "large memory" or "large data" problems. This funcionality +# must be sure to load the correct ranges of dimensions as well though! + +function ClimArray(ds::NCDatasets.AbstractDataset, var::String, name = var; eqarea = false) + svar = string(var) + cfvar = ds[svar] + attrib = Dict(cfvar.attrib) + A = cfvar |> Array + if eqarea + # TODO: This piece of code is specific to CDO output... + if haskey(ds, "ncells") # this is the equal area grid, so we make a Coord dimension + lon = ds["lon"] |> Array .|> wrap_lon + lat = ds["lat"] |> Array + time = ds["time"] |> Array + lonlat = [SVector(lon[i], lat[i]) for i in 1:length(lon)] + # here we sort lonlat and A in ascending latitude order, + # because the CDO output has reverse or even totally unsorted order + si = sortperm(lonlat, by = reverse) + data = ClimArray(A[si, :], (Coord(lonlat[si]), Time(time)); + attrib = attrib, name = svar) + elseif haskey(ds, "reduced_points") + # TODO: This can be easily upgraded to arbitary dimensions via a simple + # dimension replacement / permutation at the end + lonlat = reduced_grid_to_points(ds["lat"], ds["reduced_points"]) + si = sortperm(lonlat, by = reverse) + time = ds["time"] |> Array + data = ClimArray(A[si, :], (Coord(lonlat[si]), Time(time)); + name = svar, attrib = attrib) + else + error("Don't know how to handle this equal area grid!") + end + else # standard variables + dnames = Tuple(NCDatasets.dimnames(cfvar)) + data = ClimArray(A, create_dims(ds, dnames); name = Symbol(name), attrib = attrib) + end + if !any(ismissing, data) + data = nomissing(data) + end + return data +end + +""" + create_dims(ds::NCDatasets.AbstractDataset, dnames) +Create a tuple of `Dimension`s from the `dnames` (tuple of strings). +""" +function create_dims(ds::NCDatasets.AbstractDataset, dnames) + # true_dims = getindex.(Ref(COMMONNAMES), dnames) + true_dims = to_proper_dimensions(dnames) + dim_values = Array.(getindex.(Ref(ds), dnames)) + optimal_values = vector2range.(dim_values) + attribs = [ + ds[d].attrib isa NCDatasets.BaseAttributes ? Dict(ds[d].attrib) : nothing + for d in dnames + ] + out = [] + for i in 1:length(true_dims) + push!(out, true_dims[i](optimal_values[i]; metadata = attribs[i])) + end + return (out...,) +end + +function to_proper_dimensions(dnames) + r = [] + for n in dnames + if haskey(COMMONNAMES, n) + push!(r, COMMONNAMES[n]) + else + @warn """ + Dimension name "$n" not in common names. Strongly recommended to ask for + adding this name to COMMONNAMES on github. Making generic dimension for now... + """ + push!(r, Dim{Symbol(n)}) + end + end + return (r...,) +end + +export Dim # for generic dimensions this must be exported + +######################################################################### +# Making vectors → ranges +######################################################################### +function vector2range(x::Vector{<:Real}) + dx = x[2]-x[1] + for i in 3:length(x) + x[i]-x[i-1] ≠ dx && return x # if no constant step, return array as is + end + r = x[1]:dx:x[end] + @assert r == x + return r +end + +function vector2range(t::Vector{<:DateTime}) + !sampled_less_than_date(t) && return vector2range(Date.(t)) + # TODO: implement hourly sampling here + @warn "Hourly sampling not yet implemented." + return t +end + +function vector2range(t::Vector{<:Date}) + tsamp = temporal_sampling(t) + period = tsamp2period(tsamp) + r = t[1]:period:t[end] + @assert r == t + return r +end + + +######################################################################### +# Equal area related +######################################################################### +function reduced_grid_to_points(lat, reduced_points) + lonlat = SVector{2, Float32}[] + for (i, θ) in enumerate(lat) + n = reduced_points[i] + dλ = Float32(360/n) + for j in 0:n-1 + push!(lonlat, SVector(0 + dλ*j, θ)) + end + end + return lonlat +end + +######################################################################### +# Saving to .nc files +######################################################################### +const DEFAULT_ATTRIBS = Dict( + "time" => Dict( + "units" => "days since 0000-00-01 00:00:00", + "standard_name" => "time" + ), + "lon" => Dict( + "units" => "degrees_east", + "standard_name" => "longitude", + "valid_range" => Float32[-180.0, 360.0] + ), + "lat" => Dict( + "units" => "degrees_north", + "standard_name" => "latitude", + "valid_range" => Float32[-90.0, 90.0] + ), + "level" => Dict( + "units" => "millibars", + "long_name" => "pressure_level", + ), +) + +""" + climarrays_to_nc(file::String, Xs; globalattr = Dict()) +Write the given `ClimArray` instances (any iterable of `ClimArray`s or a single `ClimArray`) +to a `.nc` file following CF standard conventions using NCDatasets.jl. +Optionally specify global attributes for the `.nc` file. + +The metadata of the arrays in `Xs`, as well as their dimensions, are properly written +in the `.nc` file and any necessary type convertions happen automatically. + +**WARNING**: We assume that any dimensions shared between the `Xs` are identical. +""" +function climarrays_to_nc(file::String, X::ClimArray; globalattr = Dict()) + climarrays_to_nc(file, (X,); globalattr) +end +function climarrays_to_nc(file::String, Xs; globalattr = Dict()) + ds = NCDataset(file, "c"; attrib = globalattr) + # NCDataset("file.nc", "c"; attrib = globalattr) do ds + for (i, X) in enumerate(Xs) + n = string(X.name) + if n == "" + n = "x$i" + @warn "$i-th ClimArray has no name, naming it $(n) instead." + end + println("processing variable $(n)...") + println("writing dimensions...") + add_dims_to_ncfile!(ds, dims(X)) + println("writing the CF-variable...") + attrib = X.attrib + isnothing(attrib) && (attrib = Dict()) + dnames = dim_to_commonname.(dims(X)) + data = Array(X) + defVar(ds, n, data, (dnames...,); attrib) + end + close(ds) + # end +end + +function add_dims_to_ncfile!(ds::NCDatasets.AbstractDataset, dimensions::Tuple) + dnames = dim_to_commonname.(dimensions) + for (i, d) ∈ enumerate(dnames) + haskey(ds, d) && continue + v = dimensions[i].val + # this conversion to DateTime is necessary because CFTime.jl doesn't support Date + eltype(v) == Date && (v = DateTime.(v)) + l = length(v) + defDim(ds, d, l) # add dimension entry + attrib = dimensions[i].metadata + if isnothing(attrib) && haskey(DEFAULT_ATTRIBS, d) + @warn "Dimension $d has no attributes, adding default attributes (mandatory)." + attrib = DEFAULT_ATTRIBS[d] + end + # write dimension values as a variable as well (mandatory) + defVar(ds, d, v, (d, ); attrib = attrib) + end +end diff --git a/src/physical_dimensions/spatial.jl b/src/physical_dimensions/spatial.jl index 803e1522..c260a2a7 100644 --- a/src/physical_dimensions/spatial.jl +++ b/src/physical_dimensions/spatial.jl @@ -7,7 +7,7 @@ export SVector # for equal area grid ######################################################################### # Spatial indexing ######################################################################### -export spatialidxs +export spatialidxs, lonlatfirst """ spatialidxs(A::ClimArray) → idxs @@ -31,6 +31,23 @@ function spatialidxs(::EqArea, A) return ((Coord(i),) for i in 1:size(A, Coord)) end +""" + lonlatfirst(A::ClimArray, args...) → B +Permute the dimensions of `A` to make a new array `B` that has first dimension longitude, +second dimension latitude, with the remaining dimensions of `A` following +(useful for most plotting functions). Optional extra dimensions +can be given as `args...`, specifying a specific order for the remaining dimensions. + +Example: +```julia +B = lonlatfirst(A) +C = lonlatfirst(A, Time) +``` +""" +function lonlatfirst(C, args...) + permutedims(C, (Lon, Lat, args..., otherdims(C, (Lon, Lat, args...))...)) +end + ######################################################################### # Periodicity of longitude ######################################################################### diff --git a/test/runtests.jl b/test/runtests.jl index 2b509a00..71581eed 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,9 +1,10 @@ using ClimateBase, Test, Dates using Statistics - Time = ClimateBase.Time +cd(@__DIR__) # TODO: Further test spatial averaging by making one hemisphere 1 and other 0 +# TODO: Test downloaded .nc file # Create the artificial dimensional array A that will be used in tests function monthly_insolation(t::TimeType, args...) @@ -31,8 +32,8 @@ for i in 1:length(lats) end end -A = ClimArray(A, d; name = "lon-variation") -B = ClimArray(B, d; name = "lon-constant") +A = ClimArray(A, d; name = "insolation") +B = ClimArray(B, d; attrib = Dict("a" => 2)) # %% @@ -183,3 +184,21 @@ end @test y[j] < y[j-1] end end + +@testset "NetCDF file IO" begin + globat = Dict("history" => "test") + climarrays_to_nc("test.nc", (A, B); globalattr = globat) + Aloaded = ClimArray("test.nc", "insolation") + Bloaded = ClimArray("test.nc", "x2") + + @test A.data == Aloaded.data + @test dims(Aloaded, Lon).metadata["units"] == "degrees_east" + @test B.data == Bloaded.data + @test string(Bloaded.name) == "x2" + @test dims(Bloaded, Time).metadata["standard_name"] == "time" + + ds = NCDataset("test.nc") + @test ds.attrib["history"] == "test" + close(ds) + rm("test.nc") +end