Skip to content

Commit

Permalink
Work-in-progress updating dimensions and coordinates
Browse files Browse the repository at this point in the history
Simplify and align with Common Data Model

Overall design:
- dimensions now just have a name and a size
- coordinates can optionally be attached to a dimension
  (currently this are separate Dicts etc)
- coordinates are specified by (coordinate) variable name
  which is a "standard" PALEO variable.
  Coordinate values then  are looked up as needed

Changes:
- NamedDimension now just contains name, size.
  Remove FixedCoord (simpler version now in PALEOmodel)
- add functions
    get_dimensions
    get_dimension
    set_coordinates!
    get_coordinates
- simplify grids
  - remove coordinates, which can now be defined as "standard" PALEO variables
  - remove get_region (similar functionality now in PALEOmodel)
- Experimental implementation of Julia CommonDataModel interface
  in CommonDataModelExt extension.
  • Loading branch information
sjdaines committed Dec 29, 2024
1 parent 7248916 commit 5781501
Show file tree
Hide file tree
Showing 14 changed files with 752 additions and 604 deletions.
14 changes: 11 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PALEOboxes"
uuid = "804b410e-d900-4b2a-9ecd-f5a06d4c1fd4"
authors = ["Stuart Daines <stuart.daines@gmail.com>"]
version = "0.21.40"
version = "0.22.0"

[deps]
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
Expand Down Expand Up @@ -30,9 +30,16 @@ TestEnv = "1e6cf692-eddd-4d53-88a5-2d735e33781b"
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"

[weakdeps]
CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157"

[extensions]
CommonDataModelExt = "CommonDataModel"

[compat]
Atomix = "0.1, 1.0"
BenchmarkTools = "1.0"
CommonDataModel = "0.3.7"
DataFrames = "1.1"
DocStringExtensions = "0.8, 0.9"
Documenter = "1"
Expand All @@ -52,12 +59,13 @@ StructArrays = "0.6, 0.7"
TestEnv = "1.0"
TimerOutputs = "0.5"
YAML = "0.4.7"
julia = "1.6"
julia = "1.10"

[extras]
CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Documenter", "Logging", "Test"]
test = ["CommonDataModel", "Documenter", "Logging", "Test"]
1 change: 0 additions & 1 deletion docs/src/DomainsVariablesFields.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ Examples:
```@docs
Field
get_field
wrap_field
```

## Spaces
Expand Down
93 changes: 93 additions & 0 deletions ext/CommonDataModelExt/CommonDataModelExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
module CommonDataModelExt

import PALEOboxes as PB
import CommonDataModel as CDM

#####################################
# Wrapper types
######################################

struct ModelCDM <: CDM.AbstractDataset
model::PB.Model
modeldata::Union{Nothing, PB.AbstractModelData}
end

PB.CDModel(model::PB.Model) = ModelCDM(model, nothing)
PB.CDModel(modeldata::PB.AbstractModelData) = ModelCDM(modeldata.model, modeldata)

struct DomainCDM <: CDM.AbstractDataset
domain::PB.Domain
modeldata::Union{Nothing, PB.AbstractModelData}
end

PB.CDModel(domain::PB.Domain) = DomainCDM(domain, nothing)


struct VariableDomainCDM <: CDM.AbstractDataset
variabledomain::PB.VariableDomain
modeldata::Union{Nothing, PB.AbstractModelData}
data
end

PB.CDModel(variabledomain::PB.VariableDomain) = VariableDomainCDM(variabledomain, nothing, nothing)

######################################
# Model
######################################

# iterable with all group names
CDM.groupnames(m::ModelCDM) = [d.name for d in m.model.domains]

CDM.group(m::ModelCDM, name::AbstractString) = DomainCDM(PB.get_domain(m.model, name; allow_not_found=false), m.modeldata)

###############################################
# Domain
################################################

CDM.name(d::DomainCDM) = d.domain.name

# TODO
# parentdataset(d::DomainCDM)

# returns a list of variable names as strings
Base.keys(d::DomainCDM) = [v.name for v in PB.get_variables(d.domain)]

function CDM.variable(d::DomainCDM, varname::AbstractString)
variabledomain = PB.get_variable(d.domain, varname; allow_not_found=false)
if isnothing(d.modeldata)
data = nothing
else
data = PB.get_data(variabledomain, d.modeldata)
end
return VariableDomainCDM(variabledomain, d.modeldata, data)
end

CDM.dimnames(d::DomainCDM) = [nd.name for nd in PB.get_dimensions(d.domain)]

CDM.dim(d::DomainCDM, name::AbstractString) = PB.get_dimension(d.domain, name).size


###############################################
# VariableDomain
################################################

CDM.name(v::VariableDomainCDM) = v.variabledomain.name

CDM.dataset(v::VariableDomainCDM) = DomainCDM(v.variabledomain.domain, v.modeldata)

CDM.dimnames(v::VariableDomainCDM) = [nd.name for nd in PB.get_dimensions(v.variabledomain)]

Base.ndims(v::VariableDomainCDM) = length(CDM.dimnames(v))

Base.size(v::VariableDomainCDM) = (nd.size for nd in PB.get_dimensions(v.variabledomain))

CDM.attribnames(v::VariableDomainCDM) = keys(v.variabledomain.attributes)

CDM.attrib(v::VariableDomainCDM, name::Symbol) = v.variabledomain.attributes[name]

Base.getindex(v::VariableDomainCDM, indices...) = Base.getindex(v.data, indices...)

Base.eltype(v::VariableDomainCDM) = Base.eltype(v.data)


end # module
203 changes: 30 additions & 173 deletions src/CoordsDims.jl
Original file line number Diff line number Diff line change
@@ -1,199 +1,56 @@

################################
# Coordinates
#################################

"""
FixedCoord

A fixed (state independent) coordinate
"""
mutable struct FixedCoord
name::String
values::Vector{Float64}
attributes::Dict{Symbol, Any}
end
#################################################
# Dimensions
#####################################################

"""
append_units(name::AbstractString, attributes) -> "name (units)"
NamedDimension(name, size)
Utility function to append variable units string to a variable name for display.
A named dimension
"""
function append_units(name::AbstractString, attributes::Dict{Symbol, Any})
units = get(attributes, :units, "")
if isempty(units)
return name
else
return name*" ($units)"
end
struct NamedDimension
name::String
size::Int64
end

append_units(name::AbstractString, attributes::Nothing) = name
function Base.show(io::IO, nd::NamedDimension)
print(io, "NamedDimension(name=", nd.name, ", size=", nd.size, ")")
return nothing
end

"""
build_coords_edges(coords_vec::Vector{FixedCoord}) -> Vector{Float64}
Build a vector of coordinate edges (length `n+1``) from `coords_vec`, assuming the PALEO
convention that `coords_vec` contains three elements with
cell midpoints, lower edges, upper edges each of length `n`, in that order.
function get_dimensions(obj) -> Vector{NamedDimension}
Falls back to just returning the first entry in `coords_vec` for other cases.
Get all dimensions for PALEO object `obj`
"""
function build_coords_edges(coords_vec::Vector{FixedCoord})

if length(coords_vec) == 1 || length(coords_vec) > 3
# 1 coordinate or something we don't understand - take first
co = first(coords_vec)
co_values = co.values
co_label = append_units(co.name, co.attributes)
elseif length(coords_vec) in (2, 3)
# 2 coordinates assume lower, upper edges
# 3 coordinates assume mid, lower, upper
co_lower = coords_vec[end-1]
co_upper = coords_vec[end]
co_label = append_units(co_lower.name*", "*co_upper.name, co_lower.attributes)
first(co_lower.values) < first(co_upper.values) ||
@warn "build_coords_edges: $co_label co_lower is > co_upper - check model grid"
if co_lower.values[end] > co_lower.values[1] # ascending order
co_lower.values[2:end] == co_upper.values[1:end-1] ||
@warn "build_coords_edges: $co_label lower and upper edges don't match"
co_values = [co_lower.values; co_upper.values[end]]
else # descending order
co_lower.values[1:end-1] == co_upper.values[2:end] ||
@warn "build_coords_edges: $co_label lower and upper edges don't match"
co_values = [co_upper.values[1]; co_lower.values]
end

end

return co_values, co_label
end
function get_dimensions end

"guess coordinate edges from midpoints, assuming uniform spacing"
function guess_coords_edges(x_midpoints)
first_x = x_midpoints[1] - 0.5*(x_midpoints[2] - x_midpoints[1])
last_x = x_midpoints[end] + 0.5*(x_midpoints[end] - x_midpoints[end-1])
return [first_x; 0.5.*(x_midpoints[1:end-1] .+ x_midpoints[2:end]); last_x]
end


function get_region(fc::FixedCoord, indices::AbstractVector)
return FixedCoord(fc.name, fc.values[indices], fc.attributes)
end

function get_region(fcv::Vector{FixedCoord}, indices::AbstractVector)
return [FixedCoord(fc.name, fc.values[indices], fc.attributes) for fc in fcv]
end


"find indices of coord from first before range[1] to first after range[2]"
function find_indices(coord::AbstractVector, range)
length(range) == 2 ||
throw(ArgumentError("find_indices: length(range) != 2 $range"))

idxstart = findlast(t -> t<=range[1], coord)
isnothing(idxstart) && (idxstart = 1)

idxend = findfirst(t -> t>=range[2], coord)
isnothing(idxend) && (idxend = length(coord))

return idxstart:idxend, (coord[idxstart], coord[idxend])
end

"find indices of coord nearest val"
function find_indices(coord::AbstractVector, val::Real)
idx = 1
for i in 1:length(coord)
if abs(coord[i] - val) < abs(coord[idx] - val)
idx = i
end
end

return [idx], coord[idx]
end
"""
function get_dimension(obj, dimname) -> NamedDimension
#################################################
# Dimensions
#####################################################
Get all dimension `dimname` for PALEO object `obj`
"""
function get_dimension end

"""
NamedDimension
function set_coordinates!(obj, dimname, coordinates::Vector{String})
A named dimension, with optional attached fixed coordinates `coords`
Set coordinates attached to `dimname` for PALEO object `obj`
PALEO convention is that where possible `coords` contains three elements, for cell
PALEO convention is that where possible `coordinates` contains three elements, for cell
midpoints, lower edges, upper edges, in that order.
"""
mutable struct NamedDimension
name::String
size::Int64
coords::Vector{FixedCoord} # may be empty
end

"create from size only (no coords)"
function NamedDimension(name, size::Integer)
return NamedDimension(
name,
size,
FixedCoord[],
)
end

"create from coord mid-points"
function NamedDimension(name, coord::AbstractVector)
return NamedDimension(
name,
length(coord),
[
FixedCoord(name, coord, Dict{Symbol, Any}()),
]
)
end

"create from coord mid-points and edges"
function NamedDimension(name, coord::AbstractVector, coord_edges::AbstractVector)
if coord[end] > coord[1]
# ascending order
coord_lower = coord_edges[1:end-1]
coord_upper = coord_edges[2:end]
else
# descending order
coord_lower = coord_edges[2:end]
coord_upper = coord_edges[1:end-1]
end
return NamedDimension(
name,
length(coord),
[
FixedCoord(name, coord, Dict{Symbol, Any}()),
FixedCoord(name*"_lower", coord_lower, Dict{Symbol, Any}()),
FixedCoord(name*"_upper", coord_upper, Dict{Symbol, Any}()),
]
)
end

function get_region(nd::NamedDimension, indices::AbstractVector)
return NamedDimension(nd.name, length(indices), get_region(nd.coords, indices))
end
function set_coordinates! end

"""
build_coords_edges(nd::NamedDimension) -> Vector{Float64}
function get_coordinates(obj, dimname) -> coordinates::Vector{String}
Call [`build_coords_edges`](@ref)(nd.coords), or fallback to just returning indices
if no coords present.
"""
function build_coords_edges(nd::NamedDimension)
if !isempty(nd.coords)
return build_coords_edges(nd.coords)
else
@warn "no coords for NamedDimension $(nd.name), returning indices"
return collect(1:nd.size), nd.name*" (indices)"
end
end
Get coordinates (if any) attached to `dimname` for PALEO object `obj`
function Base.show(io::IO, nd::NamedDimension)
print(io, "NamedDimension(name=", nd.name, ", size=", nd.size, ", coords=(")
join(io, [c.name for c in nd.coords], ", ")
print(io, "))")
return nothing
end
PALEO convention is that where possible `coordinates` contains three elements, for cell
midpoints, lower edges, upper edges, in that order.
"""
function get_coordinates end
Loading

0 comments on commit 5781501

Please sign in to comment.