-
Notifications
You must be signed in to change notification settings - Fork 142
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add copyto! and similar for Datasets #937
Changes from 1 commit
7d12db2
bc8fe33
c2040e2
4fda065
2f542c9
1c059b8
f873639
57b2fba
fee976a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | |||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -934,6 +934,8 @@ end | ||||||||||||||||
# This infers the Julia type from the HDF5.Datatype. Specific file formats should provide their own read(dset). | |||||||||||||||||
const DatasetOrAttribute = Union{Dataset,Attribute} | |||||||||||||||||
|
|||||||||||||||||
include("views.jl") | |||||||||||||||||
|
|||||||||||||||||
function Base.read(obj::DatasetOrAttribute) | |||||||||||||||||
dtype = datatype(obj) | |||||||||||||||||
T = get_jl_type(dtype) | |||||||||||||||||
|
@@ -957,6 +959,19 @@ function Base.read(obj::DatasetOrAttribute, ::Type{T}, I...) where T | ||||||||||||||||
return val | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
""" | |||||||||||||||||
read!(obj::DatasetOrAttribute, output_buffer::AbstractArray{T} [, I...]) where T | |||||||||||||||||
|
|||||||||||||||||
Read [part of] a dataset or attribute into a preallocated output buffer. | |||||||||||||||||
The output buffer must be convertible to a pointer and have a contiguous layout. | |||||||||||||||||
""" | |||||||||||||||||
function Base.read!(obj::DatasetOrAttribute, buf::AbstractArray{T}, I...) where T | |||||||||||||||||
dtype = datatype(obj) | |||||||||||||||||
val = generic_read!(buf, obj, dtype, T, I...) | |||||||||||||||||
close(dtype) | |||||||||||||||||
return val | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
# `Type{String}` does not have a definite size, so the generic_read does not accept | |||||||||||||||||
# it even though it will return a `String`. This explicit overload allows that usage. | |||||||||||||||||
function Base.read(obj::DatasetOrAttribute, ::Type{String}, I...) | |||||||||||||||||
|
@@ -968,10 +983,26 @@ function Base.read(obj::DatasetOrAttribute, ::Type{String}, I...) | ||||||||||||||||
return val | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
""" | |||||||||||||||||
copyto!(output_buffer::AbstractArray{T}, obj::Union{DatasetOrAttribute}) where T | |||||||||||||||||
copyto!(output_buffer::AbstractArray{T}, @view(obj::Union{DatasetOrAttribute}, I...)) | |||||||||||||||||
|
|||||||||||||||||
Copy [part of] a HDF5 dataset or attribute to a preallocated output buffer. | |||||||||||||||||
The output buffer must be convertible to a pointer and have a contiguous layout. | |||||||||||||||||
""" | |||||||||||||||||
function Base.copyto!(output_buffer::AbstractArray{T}, obj::DatasetOrAttribute, I...) where T | |||||||||||||||||
return Base.read!(obj, output_buffer, I...) | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
# Special handling for reading OPAQUE datasets and attributes | |||||||||||||||||
function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}) | |||||||||||||||||
function generic_read!(buf::Matrix{UInt8}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}) | |||||||||||||||||
generic_read(obj, filetype, Opaque, buf) | |||||||||||||||||
end | |||||||||||||||||
function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}, buf::Union{Matrix{UInt8}, Nothing} = nothing) | |||||||||||||||||
sz = size(obj) | |||||||||||||||||
buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz)) | |||||||||||||||||
if isnothing(buf) | |||||||||||||||||
buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz)) | |||||||||||||||||
end | |||||||||||||||||
if obj isa Dataset | |||||||||||||||||
read_dataset(obj, filetype, buf, obj.xfer) | |||||||||||||||||
else | |||||||||||||||||
|
@@ -989,7 +1020,14 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque | ||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
# generic read function | |||||||||||||||||
function generic_read!(buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T | |||||||||||||||||
musm marked this conversation as resolved.
Show resolved
Hide resolved
|
|||||||||||||||||
return _generic_read(obj, filetype, T, buf, I...) | |||||||||||||||||
end | |||||||||||||||||
function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T | |||||||||||||||||
return _generic_read(obj, filetype, T, nothing, I...) | |||||||||||||||||
end | |||||||||||||||||
function _generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, | |||||||||||||||||
buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}, Nothing}, I...) where T | |||||||||||||||||
!isconcretetype(T) && error("type $T is not concrete") | |||||||||||||||||
!isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections") | |||||||||||||||||
|
|||||||||||||||||
|
@@ -1026,13 +1064,18 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I. | ||||||||||||||||
end | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
if do_normalize(T) | |||||||||||||||||
# The entire dataset is read into in a buffer matrix where the first dimension at | |||||||||||||||||
# any stage of normalization is the bytes for a single element of type `T`, and | |||||||||||||||||
# the second dimension of the matrix runs through all elements. | |||||||||||||||||
buf = Matrix{UInt8}(undef, sizeof(T), prod(sz)) | |||||||||||||||||
if isnothing(buf) | |||||||||||||||||
if do_normalize(T) | |||||||||||||||||
# The entire dataset is read into in a buffer matrix where the first dimension at | |||||||||||||||||
# any stage of normalization is the bytes for a single element of type `T`, and | |||||||||||||||||
# the second dimension of the matrix runs through all elements. | |||||||||||||||||
buf = Matrix{UInt8}(undef, sizeof(T), prod(sz)) | |||||||||||||||||
else | |||||||||||||||||
buf = Array{T}(undef, sz...) | |||||||||||||||||
end | |||||||||||||||||
else | |||||||||||||||||
buf = Array{T}(undef, sz...) | |||||||||||||||||
sizeof(buf) != prod(sz)*sizeof(T) && | |||||||||||||||||
error("Provided array buffer of size, $(size(buf)), and element type, $(eltype(buf)), does not match the dataset of size, $sz, and type, $T") | |||||||||||||||||
end | |||||||||||||||||
memspace = isempty(I) ? dspace : dataspace(sz) | |||||||||||||||||
|
|||||||||||||||||
|
@@ -1062,6 +1105,89 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I. | ||||||||||||||||
end | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
|
|||||||||||||||||
""" | |||||||||||||||||
similar(obj::DatasetOrAttribute, [::Type{T}], [I::Integer...]) | |||||||||||||||||
|
|||||||||||||||||
Return a `Array{T}` or `Matrix{UInt8}` to that can contain [part of] the dataset. | |||||||||||||||||
""" | |||||||||||||||||
function Base.similar(obj::DatasetOrAttribute, I::Integer...) | |||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question, why do we need to pass the indices here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was more specifically looking at the AbstractArray interface https://docs.julialang.org/en/v1/manual/interfaces/#man-interface-array, where there are 4 optional
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we do implement AbstractArray in 0.17, then we do not need this signature. We could just implement the version with Dims. However, since we are not implementing AbstractArray in 0.16, we probably do need to retain a method with the above signature since we do not have a I will rename this to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Personally I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @simonbyrne did you intend to comment on the other thread? Is there a specific part of #937 (comment) you disagree with? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh yes, sorry... |
|||||||||||||||||
dtype = datatype(obj) | |||||||||||||||||
T = get_jl_type(dtype) | |||||||||||||||||
val = similar(obj, dtype, T, I...) | |||||||||||||||||
close(dtype) | |||||||||||||||||
return val | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
function Base.similar(obj::DatasetOrAttribute, ::Type{T}, I::Integer...) where T | |||||||||||||||||
dtype = datatype(obj) | |||||||||||||||||
val = similar(obj, dtype, T, I...) | |||||||||||||||||
close(dtype) | |||||||||||||||||
return val | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
function Base.similar(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}) | |||||||||||||||||
sz = size(obj) | |||||||||||||||||
return Matrix{UInt8}(undef, sizeof(filetype), prod(sz)) | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
# Duplicated from generic_read. TODO: Deduplicate | |||||||||||||||||
function Base.similar(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I::Integer...) where T | |||||||||||||||||
!isconcretetype(T) && error("type $T is not concrete") | |||||||||||||||||
!isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections") | |||||||||||||||||
|
|||||||||||||||||
I = Base.OneTo.(I) | |||||||||||||||||
|
|||||||||||||||||
memtype = Datatype(API.h5t_get_native_type(filetype)) # padded layout in memory | |||||||||||||||||
|
|||||||||||||||||
if sizeof(T) != sizeof(memtype) | |||||||||||||||||
error(""" | |||||||||||||||||
Type size mismatch | |||||||||||||||||
sizeof($T) = $(sizeof(T)) | |||||||||||||||||
sizeof($memtype) = $(sizeof(memtype)) | |||||||||||||||||
""") | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
dspace = dataspace(obj) | |||||||||||||||||
stype = API.h5s_get_simple_extent_type(dspace) | |||||||||||||||||
stype == API.H5S_NULL && return EmptyArray{T}() | |||||||||||||||||
|
|||||||||||||||||
if !isempty(I) | |||||||||||||||||
indices = Base.to_indices(obj, I) | |||||||||||||||||
dspace = hyperslab(dspace, indices...) | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
scalar = false | |||||||||||||||||
if stype == API.H5S_SCALAR | |||||||||||||||||
sz = (1,) | |||||||||||||||||
scalar = true | |||||||||||||||||
elseif isempty(I) | |||||||||||||||||
sz = size(dspace) | |||||||||||||||||
else | |||||||||||||||||
sz = map(length, filter(i -> !isa(i, Int), indices)) | |||||||||||||||||
if isempty(sz) | |||||||||||||||||
sz = (1,) | |||||||||||||||||
scalar = true | |||||||||||||||||
end | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
if do_normalize(T) | |||||||||||||||||
# The entire dataset is read into in a buffer matrix where the first dimension at | |||||||||||||||||
# any stage of normalization is the bytes for a single element of type `T`, and | |||||||||||||||||
# the second dimension of the matrix runs through all elements. | |||||||||||||||||
buf = Matrix{UInt8}(undef, sizeof(T), prod(sz)) | |||||||||||||||||
buf = reshape(normalize_types(T, buf), sz...) | |||||||||||||||||
else | |||||||||||||||||
buf = Array{T}(undef, sz...) | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
|
|||||||||||||||||
close(memtype) | |||||||||||||||||
close(dspace) | |||||||||||||||||
|
|||||||||||||||||
return buf | |||||||||||||||||
end | |||||||||||||||||
|
|||||||||||||||||
# Array constructor for datasets | |||||||||||||||||
Array(x::Dataset) = read(x) | |||||||||||||||||
|
|||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Permit Base.view for Dataset and Attribute to normalize Base.copyto! syntax | ||
|
||
# Long term consideration: | ||
# If Dataset and Attribute become an AbstractArray, perhaps these should just be SubArrays ? | ||
""" | ||
DatasetView | ||
|
||
This is an experimental data type and may change. Use `view` rather than `DatasetView` directly. | ||
""" | ||
struct DatasetView | ||
parent::Dataset | ||
indices | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to store the dataspace? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is worthwhile eventually, but I think we should defer this to a larger overhaul of In particular, we should consider parameterizing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See #930 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm leaning towards removing the view feature from this pull request. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's a good idea, but I would suggest making it actually a functional part: i.e. calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. I just think the |
||
end | ||
|
||
function Base.view(obj::Dataset, I...) | ||
return DatasetView(obj, I) | ||
end | ||
|
||
Base.similar(view::DatasetView) = similar(view.parent, length.(view.indices)...) | ||
|
||
""" | ||
AttributeView | ||
|
||
This is an experimental data type and may change. Use `view` rather than `AttributeView` directly. | ||
""" | ||
struct AttributeView | ||
parent::Attribute | ||
indices | ||
end | ||
|
||
function Base.view(obj::Attribute, I...) | ||
return AttributeView(obj, I) | ||
end | ||
|
||
Base.similar(view::AttributeView) = similar(view.parent, length.(view.indices)...) | ||
|
||
const DatasetOrAttributeView = Union{DatasetView, AttributeView} | ||
|
||
function Base.copyto!(output_buffer::AbstractArray{T}, view::DatasetOrAttributeView) where T | ||
return Base.read!(view.parent, output_buffer, view.indices...) | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
using HDF5 | ||
using Test | ||
|
||
@testset "views and non-allocating methods" begin | ||
fn = tempname() | ||
|
||
@info "view.jl" fn | ||
data = rand(UInt16, 16, 16) | ||
|
||
h5open(fn, "w") do h5f | ||
h5f["data"] = data | ||
end | ||
|
||
h5open(fn, "r") do h5f | ||
buffer = similar(h5f["data"]) | ||
copyto!(buffer, h5f["data"]) | ||
@test isequal(buffer, data) | ||
|
||
read!(h5f["data"], buffer) | ||
@test isequal(buffer, data) | ||
|
||
v = @view(h5f["data"][1:4, 1:4]) | ||
|
||
buffer = similar(v) | ||
@test size(buffer) == (4,4) | ||
copyto!(buffer, v) | ||
@test isequal(buffer, @view(data[1:4, 1:4])) | ||
|
||
buffer .= 1 | ||
read!(h5f["data"], buffer, 1:4, 1:4) | ||
@test isequal(buffer, @view(data[1:4, 1:4])) | ||
|
||
@test size(similar(h5f["data"], Int16)) == size(h5f["data"]) | ||
@test size(similar(h5f["data"], 5,6)) == (5, 6) | ||
@test size(similar(h5f["data"], Int16, 8,7)) == (8,7) | ||
end | ||
|
||
rm(fn) | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm curious why do we need this method? Seems like
copyto!
alone would be sufficient as per the AbstractArray interface.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We currently implement
Base.read
, so why notBase.read!
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we should define this method, because we are moving more and more having the
HDF5.Dataset
type satisfying theAbstractArray
interface. Users would expectcopyto!
as that is the standard name for this function, thus additionally addingBase.read!
seems unnecessary.I understand that we are defining methods on
Base.read
throughout theHDF5
module, but I don't see that we should extend that and addBase.read!
The definition in IO/Networking, also suggests that this wouldn't be the best overload, since it's used by default for reading from a filename or stream.
https://docs.julialang.org/en/v1/base/io-network/#Base.read!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Base.read!
has been removed.