From 50fa3f6a25de5755aa576f2a05a55cdf520c0dfa Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Fri, 1 Sep 2023 15:07:58 -0500 Subject: [PATCH] feat: `to_buffers` (#28) * foot in the door * Got started on to_buffers: done with PrimitiveArray. * Defined a bunch of tests. * to_buffers: EmptyArray and ListOffsetArray. * to_buffers: strings and ListArray. * to_buffers: RegularArray, RecordArray, TupleArray. * to_buffers: IndexedArray, IndexedOptionArray, ByteMaskedArray, BitMaskedArray, and UnmaskedArray. * to_buffers: UnionArray, and we're done. --- src/AwkwardArray.jl | 332 +++++++++++++++++++++++++++++++++++++++++++- test/runtests.jl | 305 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 635 insertions(+), 2 deletions(-) diff --git a/src/AwkwardArray.jl b/src/AwkwardArray.jl index 806cd32..77936ff 100644 --- a/src/AwkwardArray.jl +++ b/src/AwkwardArray.jl @@ -236,6 +236,63 @@ function push_dummy!(layout::PrimitiveArray{ITEM}) where {ITEM} push!(layout, zero(ITEM)) end +function _to_buffers!( + layout::PrimitiveArray{ITEM,BUFFER}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {ITEM,BUFFER<:AbstractVector{ITEM}} + form_key = "node$(number[begin])" + number[begin] += 1 + + if ITEM == Bool + primitive = "bool" + elseif ITEM == Int8 + primitive = "int8" + elseif ITEM == UInt8 + primitive = "uint8" + elseif ITEM == Int16 + primitive = "int16" + elseif ITEM == UInt16 + primitive = "uint16" + elseif ITEM == Int32 + primitive = "int32" + elseif ITEM == UInt32 + primitive = "uint32" + elseif ITEM == Int64 + primitive = "int64" + elseif ITEM == UInt64 + primitive = "uint64" + elseif ITEM == Float16 + primitive = "float16" + elseif ITEM == Float32 + primitive = "float32" + elseif ITEM == Float64 + primitive = "float64" + elseif ITEM == Complex{Float32} + primitive = "complex64" + elseif ITEM == Complex{Float64} + primitive = "complex128" + # elseif ITEM <: Dates.DateTime # FIXME + # primitive = "datetime64" + # elseif ITEM <: Dates.TimePeriod # FIXME + # primitive = "timedelta64" + else + error( + "PrimitiveArray has an ITEM type that can't be serialized in the to_buffers protocol: $ITEM", + ) + end + + containers["$form_key-data"] = reinterpret(UInt8, layout.data) + + Dict{String,Any}( + "class" => "NumpyArray", + "primitive" => primitive, + "inner_shape" => Vector{Int64}(), + "parameters" => _to_buffers_parameters(layout), + "form_key" => form_key, + ) +end + ### EmptyArray ########################################################### struct EmptyArray{BEHAVIOR} <: LeafType{BEHAVIOR} @@ -267,6 +324,16 @@ function Base.push!(layout::EmptyArray, input) error("attempting to fill $(typeof(layout)) with data") end +function _to_buffers!( + layout::EmptyArray, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) + number[begin] += 1 + + Dict{String,Any}("class" => "EmptyArray") +end + ### ListOffsetArray ###################################################### abstract type ListType{BEHAVIOR} <: Content{BEHAVIOR} end @@ -357,6 +424,25 @@ function push_dummy!(layout::ListOffsetArray) end_list!(layout) end +function _to_buffers!( + layout::ListOffsetArray{INDEX,CONTENT}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {INDEX<:IndexBig,CONTENT<:Content} + form_key = "node$(number[begin])" + number[begin] += 1 + + containers["$form_key-offsets"] = reinterpret(UInt8, layout.offsets) + + Dict{String,Any}( + "class" => "ListOffsetArray", + "offsets" => _to_buffers_index(INDEX), + "content" => _to_buffers!(layout.content, number, containers), + "parameters" => _to_buffers_parameters(layout), + "form_key" => form_key, + ) +end + ### ListArray ############################################################ struct ListArray{INDEX<:IndexBig,CONTENT<:Content,BEHAVIOR} <: ListType{BEHAVIOR} @@ -463,6 +549,27 @@ function push_dummy!(layout::ListArray) end_list!(layout) end +function _to_buffers!( + layout::ListArray{INDEX,CONTENT}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {INDEX<:IndexBig,CONTENT<:Content} + form_key = "node$(number[begin])" + number[begin] += 1 + + containers["$form_key-starts"] = reinterpret(UInt8, layout.starts) + containers["$form_key-stops"] = reinterpret(UInt8, layout.stops) + + Dict{String,Any}( + "class" => "ListArray", + "starts" => _to_buffers_index(INDEX), + "stops" => _to_buffers_index(INDEX), + "content" => _to_buffers!(layout.content, number, containers), + "parameters" => _to_buffers_parameters(layout), + "form_key" => form_key, + ) +end + ### RegularArray ######################################################### mutable struct RegularArray{CONTENT<:Content,BEHAVIOR} <: ListType{BEHAVIOR} @@ -595,6 +702,22 @@ function push_dummy!(layout::RegularArray) end_list!(layout) end +function _to_buffers!( + layout::RegularArray{CONTENT}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {CONTENT<:Content} + form_key = "node$(number[begin])" + number[begin] += 1 + + Dict{String,Any}( + "class" => "RegularArray", + "size" => layout.size, + "content" => _to_buffers!(layout.content, number, containers), + "parameters" => _to_buffers_parameters(layout), + ) +end + ### ListType with behavior = :string ##################################### StringOffsetArray( @@ -1070,6 +1193,21 @@ function push_dummy!(layout::RecordArray) end_record!(layout) end +function _to_buffers!( + layout::RecordArray{FIELDS,CONTENTS}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {FIELDS,CONTENTS<:Base.Tuple{Vararg{Content}}} + number[begin] += 1 + + Dict{String,Any}( + "class" => "RecordArray", + "fields" => [String(x) for x in FIELDS], + "contents" => [_to_buffers!(x, number, containers) for x in layout.contents], + "parameters" => _to_buffers_parameters(layout), + ) +end + ### TupleArray ########################################################### mutable struct TupleArray{CONTENTS<:Base.Tuple{Vararg{Content}},BEHAVIOR} <: @@ -1243,6 +1381,21 @@ function push_dummy!(layout::TupleArray) end_tuple!(layout) end +function _to_buffers!( + layout::TupleArray{CONTENTS}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {FIELDS,CONTENTS<:Base.Tuple{Vararg{Content}}} + number[begin] += 1 + + Dict{String,Any}( + "class" => "RecordArray", + "fields" => nothing, + "contents" => [_to_buffers!(x, number, containers) for x in layout.contents], + "parameters" => _to_buffers_parameters(layout), + ) +end + ### IndexedArray ######################################################### struct IndexedArray{INDEX<:IndexBig,CONTENT<:Content,BEHAVIOR} <: Content{BEHAVIOR} @@ -1342,6 +1495,25 @@ function push_dummy!(layout::IndexedArray) layout end +function _to_buffers!( + layout::IndexedArray{INDEX,CONTENT}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {INDEX<:IndexBig,CONTENT<:Content} + form_key = "node$(number[begin])" + number[begin] += 1 + + containers["$form_key-index"] = reinterpret(UInt8, layout.index) + + Dict{String,Any}( + "class" => "IndexedArray", + "index" => _to_buffers_index(INDEX), + "content" => _to_buffers!(layout.content, number, containers), + "parameters" => _to_buffers_parameters(layout), + "form_key" => form_key, + ) +end + ### IndexedOptionArray ################################################### abstract type OptionType{BEHAVIOR} <: Content{BEHAVIOR} end @@ -1473,6 +1645,25 @@ function push_dummy!(layout::IndexedOptionArray) push_null!(layout) end +function _to_buffers!( + layout::IndexedOptionArray{INDEX,CONTENT}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {INDEX<:IndexBigSigned,CONTENT<:Content} + form_key = "node$(number[begin])" + number[begin] += 1 + + containers["$form_key-index"] = reinterpret(UInt8, layout.index) + + Dict{String,Any}( + "class" => "IndexedOptionArray", + "index" => _to_buffers_index(INDEX), + "content" => _to_buffers!(layout.content, number, containers), + "parameters" => _to_buffers_parameters(layout), + "form_key" => form_key, + ) +end + ### ByteMaskedArray ###################################################### struct ByteMaskedArray{INDEX<:IndexBool,CONTENT<:Content,BEHAVIOR} <: OptionType{BEHAVIOR} @@ -1604,6 +1795,26 @@ function push_dummy!(layout::ByteMaskedArray) push_null!(layout) end +function _to_buffers!( + layout::ByteMaskedArray{INDEX,CONTENT}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {INDEX<:IndexBool,CONTENT<:Content} + form_key = "node$(number[begin])" + number[begin] += 1 + + containers["$form_key-mask"] = reinterpret(UInt8, layout.mask) + + Dict{String,Any}( + "class" => "ByteMaskedArray", + "mask" => _to_buffers_index(INDEX), + "content" => _to_buffers!(layout.content, number, containers), + "valid_when" => layout.valid_when, + "parameters" => _to_buffers_parameters(layout), + "form_key" => form_key, + ) +end + ### BitMaskedArray ####################################################### # # Note: all Python BitMaskedArrays must be converted to lsb_order = true. @@ -1736,6 +1947,28 @@ function push_dummy!(layout::BitMaskedArray) push_null!(layout) end +function _to_buffers!( + layout::BitMaskedArray{CONTENT}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {CONTENT<:Content} + form_key = "node$(number[begin])" + number[begin] += 1 + + cut = 1:Int64(ceil(length(layout.mask) / 8.0)) + containers["$form_key-mask"] = reinterpret(UInt8, layout.mask.chunks)[cut] + + Dict{String,Any}( + "class" => "BitMaskedArray", + "mask" => "u8", + "content" => _to_buffers!(layout.content, number, containers), + "valid_when" => layout.valid_when, + "lsb_order" => true, + "parameters" => _to_buffers_parameters(layout), + "form_key" => form_key, + ) +end + ### UnmaskedArray ######################################################## struct UnmaskedArray{CONTENT<:Content,BEHAVIOR} <: OptionType{BEHAVIOR} @@ -1812,6 +2045,20 @@ function push_dummy!(layout::UnmaskedArray) push_dummy!(layout.content) end +function _to_buffers!( + layout::UnmaskedArray{CONTENT}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {CONTENT<:Content} + number[begin] += 1 + + Dict{String,Any}( + "class" => "UnmaskedArray", + "content" => _to_buffers!(layout.content, number, containers), + "parameters" => _to_buffers_parameters(layout), + ) +end + ### UnionArray ########################################################### struct UnionArray{ @@ -2014,6 +2261,27 @@ function Base.push!(layout::UnionArray, input) end end +function _to_buffers!( + layout::UnionArray{TAGS,INDEX,CONTENTS}, + number::Vector{Int64}, + containers::Dict{String,AbstractVector{UInt8}}, +) where {TAGS<:Index8,INDEX<:IndexBig,CONTENTS<:Base.Tuple{Vararg{Content}}} + form_key = "node$(number[begin])" + number[begin] += 1 + + containers["$form_key-tags"] = reinterpret(UInt8, layout.tags) + containers["$form_key-index"] = reinterpret(UInt8, layout.index) + + Dict{String,Any}( + "class" => "UnionArray", + "tags" => _to_buffers_index(TAGS), + "index" => _to_buffers_index(INDEX), + "contents" => [_to_buffers!(x, number, containers) for x in layout.contents], + "parameters" => _to_buffers_parameters(layout), + "form_key" => form_key, + ) +end + ### from_iter ############################################################ function layout_for(ItemType) @@ -2042,6 +2310,9 @@ function layout_for(ItemType) TupleArray{Base.Tuple{contents...}} elseif Missing <: ItemType + if ItemType == Any + error("cannot produce an AwkwardArray layout for $ItemType (too generic)") + end OtherTypes = [x for x in Base.uniontypes(ItemType) if x != Missing] if length(OtherTypes) == 0 IndexedOptionArray{Vector{Int64},EmptyArray} @@ -2066,6 +2337,11 @@ function layout_for(ItemType) else OtherTypes = Base.uniontypes(ItemType) if length(OtherTypes) > 1 + if length(OtherTypes) > 127 + error( + "cannot produce a UnionArray with more than 127 possible types: $(length(OtherTypes)) detected", + ) + end contents = [layout_for(x) for x in OtherTypes] UnionArray{Index8,Vector{Int64},Base.Tuple{contents...}} else @@ -2698,7 +2974,7 @@ function _vertical(data::Union{Content,Record,Tuple}, limit_rows::Int, limit_col end -### from_buffers/to_buffers ############################################## +### from_buffers ######################################################### default_buffer_key(form_key::String, attribute::String) = "$form_key-$attribute" @@ -3279,4 +3555,58 @@ function from_buffers( end end # function from_buffers +### to_buffers ########################################################### + +function to_buffers(layout::Content) + containers = Dict{String,AbstractVector{UInt8}}() + number = Vector{Int64}([0]) + form = _to_buffers!(layout, number, containers) + (JSON.json(form), length(layout), containers) +end + +function _to_buffers_parameters(layout::CONTENT) where {BEHAVIOR,CONTENT<:Content{BEHAVIOR}} + out = Dict{String,Any}() + for k in keys(layout.parameters) + out[k] = get_parameter(layout.parameters, k) + end + if isa(layout, PrimitiveArray) + if BEHAVIOR == :char + out["__array__"] = "char" + elseif BEHAVIOR == :byte + out["__array__"] = "byte" + end + elseif isa(layout, ListType) + if BEHAVIOR == :string + out["__array__"] = "string" + elseif BEHAVIOR == :bytestring + out["__array__"] = "bytestring" + elseif BEHAVIOR != :default + out["__list__"] = String(BEHAVIOR) + end + elseif isa(layout, RecordArray) + if BEHAVIOR != :default + out["__record__"] = String(BEHAVIOR) + end + end + out +end + +function _to_buffers_index(IndexType::DataType) + if IndexType <: Index8 + "i8" + elseif IndexType <: AbstractVector{Bool} + "i8" + elseif IndexType <: IndexU8 + "u8" + elseif IndexType <: Index32 + "i32" + elseif IndexType <: IndexU32 + "u32" + elseif IndexType <: Index64 + "i64" + else + error("unexpected INDEX type in to_buffers: $IndexType") + end +end + end # module AwkwardArray diff --git a/test/runtests.jl b/test/runtests.jl index d0f2e61..2d6cc75 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using AwkwardArray +using JSON using Test @testset "AwkwardArray.jl" begin @@ -2487,7 +2488,7 @@ using Test end - ### from_buffers/to_buffers ############################################## + ### from_buffers ######################################################### begin layout = AwkwardArray.from_buffers( @@ -2825,4 +2826,306 @@ using Test @test AwkwardArray.to_vector(layout) == [1.1, 2.2, [3, 4], "five"] end + ### to_buffers ########################################################### + + begin + layout = AwkwardArray.PrimitiveArray([1.1, 2.2, 3.3, 4.4, 5.5]) + form, len, containers = AwkwardArray.to_buffers(layout) + + @test JSON.parse(form) == JSON.parse( + """{"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node0"}""", + ) + @test len == 5 + @test containers == Dict{String,Vector{UInt8}}( + "node0-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x9a\x99\x99\x99\x99\x99\x11@\x00\x00\x00\x00\x00\x00\x16@", + ), + ) + end + + begin + layout = AwkwardArray.PrimitiveArray( + [1.1, 2.2, 3.3, 4.4, 5.5], + parameters = AwkwardArray.Parameters("what" => "ever"), + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {"what": "ever"}, "form_key": "node0"}""", + ) + @test len == 5 + @test containers == Dict{String,Vector{UInt8}}( + "node0-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x9a\x99\x99\x99\x99\x99\x11@\x00\x00\x00\x00\x00\x00\x16@", + ), + ) + end + + begin + layout = AwkwardArray.EmptyArray() + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse("""{"class": "EmptyArray"}""") + @test len == 0 + @test containers == Dict{String,Vector{UInt8}}() + end + + begin + layout = AwkwardArray.from_iter( + Vector{Vector{Float64}}([[1.1, 2.2, 3.3], [], [4.4, 5.5]]), + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "ListOffsetArray", "offsets": "i64", "content": {"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, "parameters": {}, "form_key": "node0"}""", + ) + @test len == 3 + @test containers == Dict{String,Vector{UInt8}}( + "node0-offsets" => Vector{UInt8}( + b"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00", + ), + "node1-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x9a\x99\x99\x99\x99\x99\x11@\x00\x00\x00\x00\x00\x00\x16@", + ), + ) + end + + begin + layout = AwkwardArray.from_iter(["one", "two", "three"]) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "ListOffsetArray", "offsets": "i64", "content": {"class": "NumpyArray", "primitive": "uint8", "inner_shape": [], "parameters": {"__array__": "char"}, "form_key": "node1"}, "parameters": {"__array__": "string"}, "form_key": "node0"}""", + ) + @test len == 3 + @test containers == Dict{String,Vector{UInt8}}( + "node0-offsets" => Vector{UInt8}( + b"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00", + ), + "node1-data" => Vector{UInt8}(b"onetwothree"), + ) + end + + begin + layout = AwkwardArray.ListArray( + [0, 3, 3], + [3, 3, 5], + AwkwardArray.PrimitiveArray([1.1, 2.2, 3.3, 4.4, 5.5]), + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "ListArray", "starts": "i64", "stops": "i64", "content": {"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, "parameters": {}, "form_key": "node0"}""", + ) + @test len == 3 + @test containers == Dict{String,Vector{UInt8}}( + "node0-starts" => Vector{UInt8}( + b"\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00", + ), + "node0-stops" => Vector{UInt8}( + b"\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00", + ), + "node1-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x9a\x99\x99\x99\x99\x99\x11@\x00\x00\x00\x00\x00\x00\x16@", + ), + ) + end + + begin + layout = AwkwardArray.RegularArray( + AwkwardArray.RegularArray(AwkwardArray.PrimitiveArray(0:29), 5), + 3, + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "RegularArray", "size": 3, "content": {"class": "RegularArray", "size": 5, "content": {"class": "NumpyArray", "primitive": "int64", "inner_shape": [], "parameters": {}, "form_key": "node2"}, "parameters": {}}, "parameters": {}}""", + ) + @test len == 2 + @test containers == Dict{String,Vector{UInt8}}( + "node2-data" => Vector{UInt8}( + b"\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00\x00\x00\x00\x00\x12\x00\x00\x00\x00\x00\x00\x00\x13\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x15\x00\x00\x00\x00\x00\x00\x00\x16\x00\x00\x00\x00\x00\x00\x00\x17\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00\x00\x00\x00\x00\x1a\x00\x00\x00\x00\x00\x00\x00\x1b\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00\x00\x00\x00\x1d\x00\x00\x00\x00\x00\x00\x00", + ), + ) + end + + begin + layout = + AwkwardArray.from_iter([(x = 1, y = 1.1), (x = 2, y = 2.2), (x = 3, y = 3.3)]) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "RecordArray", "fields": ["x", "y"], "contents": [{"class": "NumpyArray", "primitive": "int64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, {"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node2"}], "parameters": {}}""", + ) + @test len == 3 + @test containers == Dict{String,Vector{UInt8}}( + "node1-data" => Vector{UInt8}( + b"\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00", + ), + "node2-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@", + ), + ) + end + + begin + layout = AwkwardArray.from_iter([(1, 1.1), (2, 2.2), (3, 3.3)]) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "RecordArray", "fields": null, "contents": [{"class": "NumpyArray", "primitive": "int64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, {"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node2"}], "parameters": {}}""", + ) + @test len == 3 + @test containers == Dict{String,Vector{UInt8}}( + "node1-data" => Vector{UInt8}( + b"\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00", + ), + "node2-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@", + ), + ) + end + + begin + layout = AwkwardArray.IndexedArray( + [3, 0, 0, 1, 3, 1, 2], + AwkwardArray.PrimitiveArray([1.1, 2.2, 3.3, 9.9]), + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "IndexedArray", "index": "i64", "content": {"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, "parameters": {}, "form_key": "node0"}""", + ) + @test len == 7 + @test containers == Dict{String,Vector{UInt8}}( + "node0-index" => Vector{UInt8}( + b"\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00", + ), + "node1-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\xcd\xcc\xcc\xcc\xcc\xcc#@", + ), + ) + end + + begin + layout = AwkwardArray.IndexedOptionArray( + [3, 0, 0, -1, -1, 1, 2], + AwkwardArray.PrimitiveArray([1.1, 2.2, 3.3, 9.9]), + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "IndexedOptionArray", "index": "i64", "content": {"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, "parameters": {}, "form_key": "node0"}""", + ) + @test len == 7 + @test containers == Dict{String,Vector{UInt8}}( + "node0-index" => Vector{UInt8}( + b"\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00", + ), + "node1-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\xcd\xcc\xcc\xcc\xcc\xcc#@", + ), + ) + end + + begin + layout = AwkwardArray.ByteMaskedArray( + [false, false, false, true, true, false], + AwkwardArray.PrimitiveArray([1.1, 2.2, 3.3, 99999, 99999, 9.9]), + valid_when = false, + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "ByteMaskedArray", "mask": "i8", "content": {"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, "valid_when": false, "parameters": {}, "form_key": "node0"}""", + ) + @test len == 6 + @test containers == Dict{String,Vector{UInt8}}( + "node0-mask" => Vector{UInt8}(b"\x00\x00\x00\x01\x01\x00"), + "node1-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\xf0i\xf8@\x00\x00\x00\x00\xf0i\xf8@\xcd\xcc\xcc\xcc\xcc\xcc#@", + ), + ) + end + + begin + layout = AwkwardArray.BitMaskedArray( + BitVector([ + false, + false, + false, + true, + true, + false, + false, + false, + false, + true, + true, + false, + ]), + AwkwardArray.PrimitiveArray([ + 1.1, + 2.2, + 3.3, + 99999, + 99999, + 9.9, + 1.1, + 2.2, + 3.3, + 99999, + 99999, + 9.9, + ]), + valid_when = false, + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "BitMaskedArray", "mask": "u8", "content": {"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, "valid_when": false, "lsb_order": true, "parameters": {}, "form_key": "node0"}""", + ) + @test len == 12 + @test containers == Dict{String,Vector{UInt8}}( + "node0-mask" => Vector{UInt8}(b"\x18\x06"), + "node1-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\xf0i\xf8@\x00\x00\x00\x00\xf0i\xf8@\xcd\xcc\xcc\xcc\xcc\xcc#@\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\xf0i\xf8@\x00\x00\x00\x00\xf0i\xf8@\xcd\xcc\xcc\xcc\xcc\xcc#@", + ), + ) + end + + begin + layout = AwkwardArray.UnmaskedArray( + AwkwardArray.PrimitiveArray([1.1, 2.2, 3.3, 99999, 99999, 9.9]), + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "UnmaskedArray", "content": {"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, "parameters": {}}""", + ) + @test len == 6 + @test containers == Dict{String,Vector{UInt8}}( + "node1-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\xf0i\xf8@\x00\x00\x00\x00\xf0i\xf8@\xcd\xcc\xcc\xcc\xcc\xcc#@", + ), + ) + end + + begin + layout = AwkwardArray.from_iter( + Vector{Union{Float64,Vector{Int64},String}}([1.1, 2.2, [3, 4], "five"]), + ) + form, len, containers = AwkwardArray.to_buffers(layout) + @test JSON.parse(form) == JSON.parse( + """{"class": "UnionArray", "tags": "i8", "index": "i64", "contents": [{"class": "NumpyArray", "primitive": "float64", "inner_shape": [], "parameters": {}, "form_key": "node1"}, {"class": "ListOffsetArray", "offsets": "i64", "content": {"class": "NumpyArray", "primitive": "int64", "inner_shape": [], "parameters": {}, "form_key": "node3"}, "parameters": {}, "form_key": "node2"}, {"class": "ListOffsetArray", "offsets": "i64", "content": {"class": "NumpyArray", "primitive": "uint8", "inner_shape": [], "parameters": {"__array__": "char"}, "form_key": "node5"}, "parameters": {"__array__": "string"}, "form_key": "node4"}], "parameters": {}, "form_key": "node0"}""", + ) + @test len == 4 + @test containers == Dict{String,Vector{UInt8}}( + "node0-tags" => Vector{UInt8}(b"\x00\x00\x01\x02"), + "node0-index" => Vector{UInt8}( + b"\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + ), + "node1-data" => Vector{UInt8}( + b"\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@", + ), + "node2-offsets" => Vector{UInt8}( + b"\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00", + ), + "node3-data" => Vector{UInt8}( + b"\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00", + ), + "node4-offsets" => Vector{UInt8}( + b"\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00", + ), + "node5-data" => Vector{UInt8}(b"five"), + ) + end + end # @testset "AwkwardArray.jl"