From ce3c1e616ec6c5e320d99f071eaf7490e4f68800 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Wed, 1 Jun 2022 12:09:41 +1200 Subject: [PATCH 1/3] Allow updating key-value metadata after writer creation --- cpp/KeyValueMetadata.cpp | 21 +++- csharp.test/TestKeyValueMetadata.cs | 154 ++++++++++++++++++++++++++++ csharp/KeyValueMetadata.cs | 24 ++++- csharp/ParquetFileWriter.cs | 81 +++++++++------ 4 files changed, 246 insertions(+), 34 deletions(-) create mode 100644 csharp.test/TestKeyValueMetadata.cs diff --git a/cpp/KeyValueMetadata.cpp b/cpp/KeyValueMetadata.cpp index 038f5a42..f75edb38 100644 --- a/cpp/KeyValueMetadata.cpp +++ b/cpp/KeyValueMetadata.cpp @@ -9,7 +9,7 @@ using namespace parquet; extern "C" { - PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Make(const int64_t size, const char** keys, const char** values, std::shared_ptr** key_value_metadata) + PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Make(const int64_t size, const char** keys, const char** values, std::shared_ptr** key_value_metadata) { TRYCATCH ( @@ -22,10 +22,15 @@ extern "C" values_vector[i] = values[i]; } - *key_value_metadata = new std::shared_ptr(new KeyValueMetadata(keys_vector, values_vector)); + *key_value_metadata = new std::shared_ptr(new KeyValueMetadata(keys_vector, values_vector)); ) } + PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_MakeEmpty(std::shared_ptr** key_value_metadata) + { + TRYCATCH(*key_value_metadata = new std::shared_ptr(new KeyValueMetadata());) + } + PARQUETSHARP_EXPORT void KeyValueMetadata_Free(const std::shared_ptr* key_value_metadata) { delete key_value_metadata; @@ -36,6 +41,18 @@ extern "C" TRYCATCH(*size = (*key_value_metadata)->size();) } + PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Set(const std::shared_ptr* key_value_metadata, const char* key, const char* value) + { + TRYCATCH + ( + ::arrow::Status status = (*key_value_metadata)->Set(key, value); + if (!status.ok()) { + std::string code = status.CodeAsString(); + return new ExceptionInfo(code.c_str(), status.message().c_str()); + } + ) + } + PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Get_Entries(const std::shared_ptr* key_value_metadata, const char*** keys, const char*** values) { TRYCATCH diff --git a/csharp.test/TestKeyValueMetadata.cs b/csharp.test/TestKeyValueMetadata.cs new file mode 100644 index 00000000..056ed20a --- /dev/null +++ b/csharp.test/TestKeyValueMetadata.cs @@ -0,0 +1,154 @@ +using System.Collections.Generic; +using System.Linq; +using ParquetSharp.IO; +using NUnit.Framework; + +namespace ParquetSharp.Test +{ + [TestFixture] + internal static class TestKeyValueMetadata + { + [Test] + public static void TestSpecifyingKeyValueMetadataUpFront() + { + var columns = new Column[] {new Column("values")}; + var values = Enumerable.Range(0, 100).ToArray(); + + var expectedKeyValueMetadata = new Dictionary + { + {"key1", "value1"}, + {"key2", "value2"}, + }; + + using var buffer = new ResizableBuffer(); + using (var output = new BufferOutputStream(buffer)) + { + using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: expectedKeyValueMetadata); + using var rowGroupWriter = fileWriter.AppendBufferedRowGroup(); + + using var colWriter = rowGroupWriter.Column(0).LogicalWriter(); + colWriter.WriteBatch(values); + fileWriter.Close(); + } + + using var input = new BufferReader(buffer); + using var fileReader = new ParquetFileReader(input); + var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata; + + Assert.That(keyValueMetadata, Is.EqualTo(expectedKeyValueMetadata)); + + fileReader.Close(); + } + + [Test] + public static void TestSpecifyingKeyValueMetadataAfterWritingData() + { + var columns = new Column[] {new Column("values")}; + var values = Enumerable.Range(0, 100).ToArray(); + + var expectedKeyValueMetadata = new Dictionary + { + {"key1", "value1"}, + {"key2", "value2"}, + }; + + using var buffer = new ResizableBuffer(); + using (var output = new BufferOutputStream(buffer)) + { + using var fileWriter = new ParquetFileWriter(output, columns); + using var rowGroupWriter = fileWriter.AppendBufferedRowGroup(); + + using (var colWriter = rowGroupWriter.Column(0).LogicalWriter()) + { + colWriter.WriteBatch(values); + } + + fileWriter.UpdateKeyValueMetadata(expectedKeyValueMetadata); + + fileWriter.Close(); + } + + using var input = new BufferReader(buffer); + using var fileReader = new ParquetFileReader(input); + var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata; + + Assert.That(keyValueMetadata, Is.EqualTo(expectedKeyValueMetadata)); + + fileReader.Close(); + } + + [Test] + public static void TestUpdatingKeyValueMetadata() + { + var columns = new Column[] {new Column("values")}; + var values = Enumerable.Range(0, 100).ToArray(); + + var initialKeyValueMetadata = new Dictionary + { + {"key1", "value1"}, + {"key2", "value2"}, + }; + var keyValueMetadataUpdate = new Dictionary + { + {"key1", "override1"}, + {"key3", "value3"}, + }; + var expectedKeyValueMetadata = new Dictionary + { + {"key1", "override1"}, + {"key2", "value2"}, + {"key3", "value3"}, + }; + + using var buffer = new ResizableBuffer(); + using (var output = new BufferOutputStream(buffer)) + { + using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: initialKeyValueMetadata); + using var rowGroupWriter = fileWriter.AppendBufferedRowGroup(); + + using (var colWriter = rowGroupWriter.Column(0).LogicalWriter()) + { + colWriter.WriteBatch(values); + } + + fileWriter.UpdateKeyValueMetadata(keyValueMetadataUpdate); + + fileWriter.Close(); + } + + using var input = new BufferReader(buffer); + using var fileReader = new ParquetFileReader(input); + var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata; + + Assert.That(keyValueMetadata, Is.EqualTo(expectedKeyValueMetadata)); + + fileReader.Close(); + } + + [Test] + public static void TestNoMetadata() + { + var columns = new Column[] {new Column("values")}; + var values = Enumerable.Range(0, 100).ToArray(); + + using var buffer = new ResizableBuffer(); + using (var output = new BufferOutputStream(buffer)) + { + using var fileWriter = new ParquetFileWriter(output, columns); + using var rowGroupWriter = fileWriter.AppendBufferedRowGroup(); + + using var colWriter = rowGroupWriter.Column(0).LogicalWriter(); + colWriter.WriteBatch(values); + fileWriter.Close(); + } + + using var input = new BufferReader(buffer); + using var fileReader = new ParquetFileReader(input); + var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata; + + Assert.That(keyValueMetadata, Is.Empty); + + fileReader.Close(); + } + } +} diff --git a/csharp/KeyValueMetadata.cs b/csharp/KeyValueMetadata.cs index 04a96e16..bc9c1d0c 100644 --- a/csharp/KeyValueMetadata.cs +++ b/csharp/KeyValueMetadata.cs @@ -6,8 +6,8 @@ namespace ParquetSharp { internal sealed class KeyValueMetadata : IDisposable { - public KeyValueMetadata(IReadOnlyDictionary keyValueMetadata) - : this(Make(keyValueMetadata)) + public KeyValueMetadata(IReadOnlyDictionary? keyValueMetadata) + : this(keyValueMetadata == null ? MakeEmpty() : Make(keyValueMetadata)) { } @@ -23,6 +23,14 @@ public void Dispose() public long Size => ExceptionInfo.Return(Handle, KeyValueMetadata_Size); + public void Set(string key, string value) + { + using var byteBuffer = new ByteBuffer(1024); + var keyPtr = StringUtil.ToCStringUtf8(key, byteBuffer); + var valuePtr = StringUtil.ToCStringUtf8(value, byteBuffer); + ExceptionInfo.Check(KeyValueMetadata_Set(Handle.IntPtr, keyPtr, valuePtr)); + } + public unsafe IReadOnlyDictionary ToDictionary() { ExceptionInfo.Check(KeyValueMetadata_Get_Entries(Handle.IntPtr, out var keys, out var values)); @@ -73,15 +81,27 @@ private static unsafe IntPtr Make(IReadOnlyDictionary keyValueMe } } + private static IntPtr MakeEmpty() + { + ExceptionInfo.Check(KeyValueMetadata_MakeEmpty(out var handle)); + return handle; + } + [DllImport(ParquetDll.Name)] private static extern IntPtr KeyValueMetadata_Make(long size, IntPtr keys, IntPtr values, out IntPtr keyValueMetadata); + [DllImport(ParquetDll.Name)] + private static extern IntPtr KeyValueMetadata_MakeEmpty(out IntPtr keyValueMetadata); + [DllImport(ParquetDll.Name)] private static extern void KeyValueMetadata_Free(IntPtr keyValueMetadata); [DllImport(ParquetDll.Name)] private static extern IntPtr KeyValueMetadata_Size(IntPtr keyValueMetadata, out long size); + [DllImport(ParquetDll.Name)] + private static extern IntPtr KeyValueMetadata_Set(IntPtr keyValueMetadata, IntPtr key, IntPtr value); + [DllImport(ParquetDll.Name)] private static extern IntPtr KeyValueMetadata_Get_Entries(IntPtr keyValueMetadata, out IntPtr keys, out IntPtr values); diff --git a/csharp/ParquetFileWriter.cs b/csharp/ParquetFileWriter.cs index 25b5a949..86ba278f 100644 --- a/csharp/ParquetFileWriter.cs +++ b/csharp/ParquetFileWriter.cs @@ -16,7 +16,8 @@ public ParquetFileWriter( { using var schema = Column.CreateSchemaNode(columns); using var writerProperties = CreateWriterProperties(compression); - _handle = CreateParquetFileWriter(path, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); Columns = columns; } @@ -28,7 +29,8 @@ public ParquetFileWriter( { using var schema = Column.CreateSchemaNode(columns); using var writerProperties = CreateWriterProperties(compression); - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); Columns = columns; } @@ -41,7 +43,8 @@ public ParquetFileWriter( { using var schema = Column.CreateSchemaNode(columns, LogicalTypeFactory = logicalTypeFactory ?? LogicalTypeFactory.Default); using var writerProperties = CreateWriterProperties(compression); - _handle = CreateParquetFileWriter(path, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); Columns = columns; } @@ -54,7 +57,8 @@ public ParquetFileWriter( { using var schema = Column.CreateSchemaNode(columns, LogicalTypeFactory = logicalTypeFactory ?? LogicalTypeFactory.Default); using var writerProperties = CreateWriterProperties(compression); - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); Columns = columns; } @@ -65,7 +69,8 @@ public ParquetFileWriter( IReadOnlyDictionary? keyValueMetadata = null) { using var schema = Column.CreateSchemaNode(columns); - _handle = CreateParquetFileWriter(path, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); Columns = columns; } @@ -76,7 +81,8 @@ public ParquetFileWriter( IReadOnlyDictionary? keyValueMetadata = null) { using var schema = Column.CreateSchemaNode(columns); - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); Columns = columns; } @@ -88,7 +94,8 @@ public ParquetFileWriter( IReadOnlyDictionary? keyValueMetadata = null) { using var schema = Column.CreateSchemaNode(columns, LogicalTypeFactory = logicalTypeFactory ?? LogicalTypeFactory.Default); - _handle = CreateParquetFileWriter(path, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); Columns = columns; } @@ -100,7 +107,8 @@ public ParquetFileWriter( IReadOnlyDictionary? keyValueMetadata = null) { using var schema = Column.CreateSchemaNode(columns, LogicalTypeFactory = logicalTypeFactory ?? LogicalTypeFactory.Default); - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); Columns = columns; } @@ -110,7 +118,8 @@ public ParquetFileWriter( WriterProperties writerProperties, IReadOnlyDictionary? keyValueMetadata = null) { - _handle = CreateParquetFileWriter(path, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); Columns = null; } @@ -120,7 +129,8 @@ public ParquetFileWriter( WriterProperties writerProperties, IReadOnlyDictionary? keyValueMetadata = null) { - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, keyValueMetadata); + _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); Columns = null; } @@ -132,6 +142,7 @@ public void Dispose() // // See https://github.com/G-Research/ParquetSharp/issues/104. + _keyValueMetadata.Dispose(); _fileMetaData?.Dispose(); _handle.Dispose(); } @@ -157,6 +168,21 @@ public RowGroupWriter AppendBufferedRowGroup() return new(ExceptionInfo.Return(_handle, ParquetFileWriter_AppendBufferedRowGroup), this); } + /// + /// Update the key-value metadata with new metadata entries. + /// This can be called any time before the writer is closed so that the updated metadata will + /// be written in the file footer. + /// Where the metadata key matches an existing key, values will be overridden, + /// otherwise a new entry is added. + /// + public void UpdateKeyValueMetadata(IReadOnlyDictionary keyValueMetadata) + { + foreach (var kvp in keyValueMetadata) + { + _keyValueMetadata.Set(kvp.Key, kvp.Value); + } + } + internal int NumColumns => ExceptionInfo.Return(_handle, ParquetFileWriter_Num_Columns); // 2021-04-08: calling this results in a segfault when the writer has been closed internal long NumRows => ExceptionInfo.Return(_handle, ParquetFileWriter_Num_Rows); // 2021-04-08: calling this results in a segfault when the writer has been closed internal int NumRowGroups => ExceptionInfo.Return(_handle, ParquetFileWriter_Num_Row_Groups); // 2021-04-08: calling this results in a segfault when the writer has been closed @@ -200,47 +226,41 @@ private static ParquetHandle CreateParquetFileWriter( string path, GroupNode schema, WriterProperties writerProperties, - IReadOnlyDictionary? keyValueMetadata) + KeyValueMetadata keyValueMetadata) { if (path == null) throw new ArgumentNullException(nameof(path)); if (schema == null) throw new ArgumentNullException(nameof(schema)); if (writerProperties == null) throw new ArgumentNullException(nameof(writerProperties)); - using (var kvm = keyValueMetadata == null ? null : new KeyValueMetadata(keyValueMetadata)) - { - ExceptionInfo.Check(ParquetFileWriter_OpenFile( - path, schema.Handle.IntPtr, writerProperties.Handle.IntPtr, kvm?.Handle.IntPtr ?? IntPtr.Zero, out var writer)); + ExceptionInfo.Check(ParquetFileWriter_OpenFile( + path, schema.Handle.IntPtr, writerProperties.Handle.IntPtr, keyValueMetadata.Handle.IntPtr, out var writer)); - // Keep alive schema and writerProperties until this point, otherwise the GC might kick in while we're in OpenFile(). - GC.KeepAlive(schema); - GC.KeepAlive(writerProperties); + // Keep alive schema and writerProperties until this point, otherwise the GC might kick in while we're in OpenFile(). + GC.KeepAlive(schema); + GC.KeepAlive(writerProperties); - return new ParquetHandle(writer, ParquetFileWriter_Free); - } + return new ParquetHandle(writer, ParquetFileWriter_Free); } private static ParquetHandle CreateParquetFileWriter( OutputStream outputStream, GroupNode schema, WriterProperties writerProperties, - IReadOnlyDictionary? keyValueMetadata) + KeyValueMetadata keyValueMetadata) { if (outputStream == null) throw new ArgumentNullException(nameof(outputStream)); if (outputStream.Handle == null) throw new ArgumentNullException(nameof(outputStream.Handle)); if (schema == null) throw new ArgumentNullException(nameof(schema)); if (writerProperties == null) throw new ArgumentNullException(nameof(writerProperties)); - using (var kvm = keyValueMetadata == null ? null : new KeyValueMetadata(keyValueMetadata)) - { - ExceptionInfo.Check(ParquetFileWriter_Open( - outputStream.Handle.IntPtr, schema.Handle.IntPtr, writerProperties.Handle.IntPtr, kvm?.Handle.IntPtr ?? IntPtr.Zero, out var writer)); + ExceptionInfo.Check(ParquetFileWriter_Open( + outputStream.Handle.IntPtr, schema.Handle.IntPtr, writerProperties.Handle.IntPtr, keyValueMetadata.Handle.IntPtr, out var writer)); - // Keep alive schema and writerProperties until this point, otherwise the GC might kick in while we're in Open(). - GC.KeepAlive(schema); - GC.KeepAlive(writerProperties); + // Keep alive schema and writerProperties until this point, otherwise the GC might kick in while we're in Open(). + GC.KeepAlive(schema); + GC.KeepAlive(writerProperties); - return new ParquetHandle(writer, ParquetFileWriter_Free); - } + return new ParquetHandle(writer, ParquetFileWriter_Free); } private static WriterProperties CreateWriterProperties(Compression compression) @@ -293,6 +313,7 @@ private static WriterProperties CreateWriterProperties(Compression compression) private static extern IntPtr ParquetFileWriter_Metadata(IntPtr writer, out IntPtr metadata); private readonly ParquetHandle _handle; + private readonly KeyValueMetadata _keyValueMetadata; internal readonly Column[]? Columns; private FileMetaData? _fileMetaData; private WriterProperties? _writerProperties; From c4779d64b395a6bf0026e0e76d5b7c55d12c787b Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 13 Jun 2022 13:10:12 +1200 Subject: [PATCH 2/3] Change approach to delaying setting key value metadata until writer is closed --- cpp/KeyValueMetadata.cpp | 28 +----- cpp/ParquetFileWriter.cpp | 9 -- csharp.test/TestKeyValueMetadata.cs | 58 +++++++++-- csharp/KeyValueMetadata.cs | 44 ++------- csharp/ParquetFileWriter.cs | 148 +++++++++++++++++++--------- 5 files changed, 162 insertions(+), 125 deletions(-) diff --git a/cpp/KeyValueMetadata.cpp b/cpp/KeyValueMetadata.cpp index f75edb38..15fedcf1 100644 --- a/cpp/KeyValueMetadata.cpp +++ b/cpp/KeyValueMetadata.cpp @@ -9,23 +9,6 @@ using namespace parquet; extern "C" { - PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Make(const int64_t size, const char** keys, const char** values, std::shared_ptr** key_value_metadata) - { - TRYCATCH - ( - std::vector keys_vector(size); - std::vector values_vector(size); - - for (int64_t i = 0; i != size; ++i) - { - keys_vector[i] = keys[i]; - values_vector[i] = values[i]; - } - - *key_value_metadata = new std::shared_ptr(new KeyValueMetadata(keys_vector, values_vector)); - ) - } - PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_MakeEmpty(std::shared_ptr** key_value_metadata) { TRYCATCH(*key_value_metadata = new std::shared_ptr(new KeyValueMetadata());) @@ -41,15 +24,10 @@ extern "C" TRYCATCH(*size = (*key_value_metadata)->size();) } - PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Set(const std::shared_ptr* key_value_metadata, const char* key, const char* value) + PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Append(const std::shared_ptr* key_value_metadata, const char* key, const char* value) { - TRYCATCH - ( - ::arrow::Status status = (*key_value_metadata)->Set(key, value); - if (!status.ok()) { - std::string code = status.CodeAsString(); - return new ExceptionInfo(code.c_str(), status.message().c_str()); - } + TRYCATCH( + (*key_value_metadata)->Append(key, value); ) } diff --git a/cpp/ParquetFileWriter.cpp b/cpp/ParquetFileWriter.cpp index f4f06f3d..c92617f9 100644 --- a/cpp/ParquetFileWriter.cpp +++ b/cpp/ParquetFileWriter.cpp @@ -86,15 +86,6 @@ extern "C" TRYCATCH(*descr = writer->descr(i);) } - PARQUETSHARP_EXPORT ExceptionInfo* ParquetFileWriter_Key_Value_Metadata(ParquetFileWriter* writer, const std::shared_ptr** key_value_metadata) - { - TRYCATCH - ( - const auto& m = writer->key_value_metadata(); - *key_value_metadata = m ? new std::shared_ptr(m) : nullptr; - ) - } - PARQUETSHARP_EXPORT ExceptionInfo* ParquetFileWriter_Metadata(ParquetFileWriter* writer, const std::shared_ptr** metadata) { TRYCATCH diff --git a/csharp.test/TestKeyValueMetadata.cs b/csharp.test/TestKeyValueMetadata.cs index 056ed20a..0ba29390 100644 --- a/csharp.test/TestKeyValueMetadata.cs +++ b/csharp.test/TestKeyValueMetadata.cs @@ -46,6 +46,8 @@ public static void TestSpecifyingKeyValueMetadataAfterWritingData() var columns = new Column[] {new Column("values")}; var values = Enumerable.Range(0, 100).ToArray(); + var keyValueMetadata = new Dictionary(); + var expectedKeyValueMetadata = new Dictionary { {"key1", "value1"}, @@ -55,7 +57,7 @@ public static void TestSpecifyingKeyValueMetadataAfterWritingData() using var buffer = new ResizableBuffer(); using (var output = new BufferOutputStream(buffer)) { - using var fileWriter = new ParquetFileWriter(output, columns); + using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: keyValueMetadata); using var rowGroupWriter = fileWriter.AppendBufferedRowGroup(); using (var colWriter = rowGroupWriter.Column(0).LogicalWriter()) @@ -63,16 +65,19 @@ public static void TestSpecifyingKeyValueMetadataAfterWritingData() colWriter.WriteBatch(values); } - fileWriter.UpdateKeyValueMetadata(expectedKeyValueMetadata); + foreach (var kvp in expectedKeyValueMetadata) + { + keyValueMetadata[kvp.Key] = kvp.Value; + } fileWriter.Close(); } using var input = new BufferReader(buffer); using var fileReader = new ParquetFileReader(input); - var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata; + var readKeyValueMetadata = fileReader.FileMetaData.KeyValueMetadata; - Assert.That(keyValueMetadata, Is.EqualTo(expectedKeyValueMetadata)); + Assert.That(readKeyValueMetadata, Is.EqualTo(expectedKeyValueMetadata)); fileReader.Close(); } @@ -83,7 +88,7 @@ public static void TestUpdatingKeyValueMetadata() var columns = new Column[] {new Column("values")}; var values = Enumerable.Range(0, 100).ToArray(); - var initialKeyValueMetadata = new Dictionary + var keyValueMetadata = new Dictionary { {"key1", "value1"}, {"key2", "value2"}, @@ -103,7 +108,7 @@ public static void TestUpdatingKeyValueMetadata() using var buffer = new ResizableBuffer(); using (var output = new BufferOutputStream(buffer)) { - using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: initialKeyValueMetadata); + using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: keyValueMetadata); using var rowGroupWriter = fileWriter.AppendBufferedRowGroup(); using (var colWriter = rowGroupWriter.Column(0).LogicalWriter()) @@ -111,16 +116,19 @@ public static void TestUpdatingKeyValueMetadata() colWriter.WriteBatch(values); } - fileWriter.UpdateKeyValueMetadata(keyValueMetadataUpdate); + foreach (var kvp in keyValueMetadataUpdate) + { + keyValueMetadata[kvp.Key] = kvp.Value; + } fileWriter.Close(); } using var input = new BufferReader(buffer); using var fileReader = new ParquetFileReader(input); - var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata; + var readKeyValueMetadata = fileReader.FileMetaData.KeyValueMetadata; - Assert.That(keyValueMetadata, Is.EqualTo(expectedKeyValueMetadata)); + Assert.That(readKeyValueMetadata, Is.EqualTo(expectedKeyValueMetadata)); fileReader.Close(); } @@ -150,5 +158,37 @@ public static void TestNoMetadata() fileReader.Close(); } + + [Test] + public static void TestWriterNotClosed() + { + var columns = new Column[] {new Column("values")}; + var values = Enumerable.Range(0, 100).ToArray(); + + var expectedKeyValueMetadata = new Dictionary + { + {"key1", "value1"}, + {"key2", "value2"}, + }; + + using var buffer = new ResizableBuffer(); + using (var output = new BufferOutputStream(buffer)) + { + using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: expectedKeyValueMetadata); + using var rowGroupWriter = fileWriter.AppendBufferedRowGroup(); + + using var colWriter = rowGroupWriter.Column(0).LogicalWriter(); + colWriter.WriteBatch(values); + // Don't close, rely on Dispose + } + + using var input = new BufferReader(buffer); + using var fileReader = new ParquetFileReader(input); + var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata; + + Assert.That(keyValueMetadata, Is.EqualTo(expectedKeyValueMetadata)); + + fileReader.Close(); + } } } diff --git a/csharp/KeyValueMetadata.cs b/csharp/KeyValueMetadata.cs index bc9c1d0c..61b1e9f2 100644 --- a/csharp/KeyValueMetadata.cs +++ b/csharp/KeyValueMetadata.cs @@ -6,8 +6,7 @@ namespace ParquetSharp { internal sealed class KeyValueMetadata : IDisposable { - public KeyValueMetadata(IReadOnlyDictionary? keyValueMetadata) - : this(keyValueMetadata == null ? MakeEmpty() : Make(keyValueMetadata)) + public KeyValueMetadata() : this(MakeEmpty()) { } @@ -21,14 +20,17 @@ public void Dispose() Handle.Dispose(); } - public long Size => ExceptionInfo.Return(Handle, KeyValueMetadata_Size); + private long Size => ExceptionInfo.Return(Handle, KeyValueMetadata_Size); - public void Set(string key, string value) + public void SetData(IReadOnlyDictionary keyValueMetadata) { using var byteBuffer = new ByteBuffer(1024); - var keyPtr = StringUtil.ToCStringUtf8(key, byteBuffer); - var valuePtr = StringUtil.ToCStringUtf8(value, byteBuffer); - ExceptionInfo.Check(KeyValueMetadata_Set(Handle.IntPtr, keyPtr, valuePtr)); + foreach (var entry in keyValueMetadata) + { + var keyPtr = StringUtil.ToCStringUtf8(entry.Key, byteBuffer); + var valuePtr = StringUtil.ToCStringUtf8(entry.Value, byteBuffer); + ExceptionInfo.Check(KeyValueMetadata_Append(Handle.IntPtr, keyPtr, valuePtr)); + } } public unsafe IReadOnlyDictionary ToDictionary() @@ -58,38 +60,12 @@ public unsafe IReadOnlyDictionary ToDictionary() } } - private static unsafe IntPtr Make(IReadOnlyDictionary keyValueMetadata) - { - using var byteBuffer = new ByteBuffer(1024); - var keys = new IntPtr[keyValueMetadata.Count]; - var values = new IntPtr[keyValueMetadata.Count]; - var i = 0; - - foreach (var entry in keyValueMetadata) - { - keys[i] = StringUtil.ToCStringUtf8(entry.Key, byteBuffer); - values[i] = StringUtil.ToCStringUtf8(entry.Value, byteBuffer); - - ++i; - } - - fixed (IntPtr* pKeys = keys) - fixed (IntPtr* pValues = values) - { - ExceptionInfo.Check(KeyValueMetadata_Make(values.Length, new IntPtr(pKeys), new IntPtr(pValues), out var handle)); - return handle; - } - } - private static IntPtr MakeEmpty() { ExceptionInfo.Check(KeyValueMetadata_MakeEmpty(out var handle)); return handle; } - [DllImport(ParquetDll.Name)] - private static extern IntPtr KeyValueMetadata_Make(long size, IntPtr keys, IntPtr values, out IntPtr keyValueMetadata); - [DllImport(ParquetDll.Name)] private static extern IntPtr KeyValueMetadata_MakeEmpty(out IntPtr keyValueMetadata); @@ -100,7 +76,7 @@ private static IntPtr MakeEmpty() private static extern IntPtr KeyValueMetadata_Size(IntPtr keyValueMetadata, out long size); [DllImport(ParquetDll.Name)] - private static extern IntPtr KeyValueMetadata_Set(IntPtr keyValueMetadata, IntPtr key, IntPtr value); + private static extern IntPtr KeyValueMetadata_Append(IntPtr keyValueMetadata, IntPtr key, IntPtr value); [DllImport(ParquetDll.Name)] private static extern IntPtr KeyValueMetadata_Get_Entries(IntPtr keyValueMetadata, out IntPtr keys, out IntPtr values); diff --git a/csharp/ParquetFileWriter.cs b/csharp/ParquetFileWriter.cs index 86ba278f..6724bf76 100644 --- a/csharp/ParquetFileWriter.cs +++ b/csharp/ParquetFileWriter.cs @@ -16,8 +16,12 @@ public ParquetFileWriter( { using var schema = Column.CreateSchemaNode(columns); using var writerProperties = CreateWriterProperties(compression); - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(path, schema, writerProperties, _parquetKeyValueMetadata); Columns = columns; } @@ -29,8 +33,12 @@ public ParquetFileWriter( { using var schema = Column.CreateSchemaNode(columns); using var writerProperties = CreateWriterProperties(compression); - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _parquetKeyValueMetadata); Columns = columns; } @@ -43,8 +51,12 @@ public ParquetFileWriter( { using var schema = Column.CreateSchemaNode(columns, LogicalTypeFactory = logicalTypeFactory ?? LogicalTypeFactory.Default); using var writerProperties = CreateWriterProperties(compression); - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(path, schema, writerProperties, _parquetKeyValueMetadata); Columns = columns; } @@ -57,8 +69,12 @@ public ParquetFileWriter( { using var schema = Column.CreateSchemaNode(columns, LogicalTypeFactory = logicalTypeFactory ?? LogicalTypeFactory.Default); using var writerProperties = CreateWriterProperties(compression); - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _parquetKeyValueMetadata); Columns = columns; } @@ -69,8 +85,12 @@ public ParquetFileWriter( IReadOnlyDictionary? keyValueMetadata = null) { using var schema = Column.CreateSchemaNode(columns); - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(path, schema, writerProperties, _parquetKeyValueMetadata); Columns = columns; } @@ -81,8 +101,12 @@ public ParquetFileWriter( IReadOnlyDictionary? keyValueMetadata = null) { using var schema = Column.CreateSchemaNode(columns); - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _parquetKeyValueMetadata); Columns = columns; } @@ -94,8 +118,12 @@ public ParquetFileWriter( IReadOnlyDictionary? keyValueMetadata = null) { using var schema = Column.CreateSchemaNode(columns, LogicalTypeFactory = logicalTypeFactory ?? LogicalTypeFactory.Default); - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(path, schema, writerProperties, _parquetKeyValueMetadata); Columns = columns; } @@ -107,8 +135,12 @@ public ParquetFileWriter( IReadOnlyDictionary? keyValueMetadata = null) { using var schema = Column.CreateSchemaNode(columns, LogicalTypeFactory = logicalTypeFactory ?? LogicalTypeFactory.Default); - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _parquetKeyValueMetadata); Columns = columns; } @@ -118,8 +150,12 @@ public ParquetFileWriter( WriterProperties writerProperties, IReadOnlyDictionary? keyValueMetadata = null) { - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(path, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(path, schema, writerProperties, _parquetKeyValueMetadata); Columns = null; } @@ -129,8 +165,12 @@ public ParquetFileWriter( WriterProperties writerProperties, IReadOnlyDictionary? keyValueMetadata = null) { - _keyValueMetadata = new KeyValueMetadata(keyValueMetadata); - _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _keyValueMetadata); + if (keyValueMetadata != null) + { + _keyValueMetadata = keyValueMetadata; + _parquetKeyValueMetadata = new KeyValueMetadata(); + } + _handle = CreateParquetFileWriter(outputStream, schema, writerProperties, _parquetKeyValueMetadata); Columns = null; } @@ -142,7 +182,9 @@ public void Dispose() // // See https://github.com/G-Research/ParquetSharp/issues/104. - _keyValueMetadata.Dispose(); + // In case a user hasn't called close, make sure we set key-value metadata before the file is closed internally + SetKeyValueMetadata(); + _parquetKeyValueMetadata?.Dispose(); _fileMetaData?.Dispose(); _handle.Dispose(); } @@ -154,6 +196,7 @@ public void Dispose() /// public void Close() { + SetKeyValueMetadata(); ExceptionInfo.Check(ParquetFileWriter_Close(_handle.IntPtr)); GC.KeepAlive(_handle); } @@ -168,21 +211,6 @@ public RowGroupWriter AppendBufferedRowGroup() return new(ExceptionInfo.Return(_handle, ParquetFileWriter_AppendBufferedRowGroup), this); } - /// - /// Update the key-value metadata with new metadata entries. - /// This can be called any time before the writer is closed so that the updated metadata will - /// be written in the file footer. - /// Where the metadata key matches an existing key, values will be overridden, - /// otherwise a new entry is added. - /// - public void UpdateKeyValueMetadata(IReadOnlyDictionary keyValueMetadata) - { - foreach (var kvp in keyValueMetadata) - { - _keyValueMetadata.Set(kvp.Key, kvp.Value); - } - } - internal int NumColumns => ExceptionInfo.Return(_handle, ParquetFileWriter_Num_Columns); // 2021-04-08: calling this results in a segfault when the writer has been closed internal long NumRows => ExceptionInfo.Return(_handle, ParquetFileWriter_Num_Rows); // 2021-04-08: calling this results in a segfault when the writer has been closed internal int NumRowGroups => ExceptionInfo.Return(_handle, ParquetFileWriter_Num_Row_Groups); // 2021-04-08: calling this results in a segfault when the writer has been closed @@ -193,18 +221,24 @@ public void UpdateKeyValueMetadata(IReadOnlyDictionary keyValueM public SchemaDescriptor Schema => new(ExceptionInfo.Return(_handle, ParquetFileWriter_Schema)); public ColumnDescriptor ColumnDescriptor(int i) => new(ExceptionInfo.Return(_handle, i, ParquetFileWriter_Descr)); + /// + /// Returns a read-only copy of the current key-value metadata to be written + /// public IReadOnlyDictionary KeyValueMetadata { get { - var kvmHandle = ExceptionInfo.Return(_handle, ParquetFileWriter_Key_Value_Metadata); - if (kvmHandle == IntPtr.Zero) + if (_keyValueMetadata == null) { return new Dictionary(); } - using var keyValueMetadata = new KeyValueMetadata(kvmHandle); - return keyValueMetadata.ToDictionary(); + var metadata = new Dictionary(_keyValueMetadata.Count); + foreach (var kvp in _keyValueMetadata) + { + metadata[kvp.Key] = kvp.Value; + } + return metadata; } } @@ -222,18 +256,37 @@ public FileMetaData? FileMetaData } } + /// + /// Sets Parquet key value metadata by copying values from the key-value metadata dictionary. + /// We delay doing this until the file is closed to allow users to modify the key-value metadata after + /// data is written. + /// + private void SetKeyValueMetadata() + { + if (_keyValueMetadataSet) + { + return; + } + + if (_keyValueMetadata != null && _parquetKeyValueMetadata != null) + { + _parquetKeyValueMetadata.SetData(_keyValueMetadata); + } + _keyValueMetadataSet = true; + } + private static ParquetHandle CreateParquetFileWriter( string path, GroupNode schema, WriterProperties writerProperties, - KeyValueMetadata keyValueMetadata) + KeyValueMetadata? keyValueMetadata) { if (path == null) throw new ArgumentNullException(nameof(path)); if (schema == null) throw new ArgumentNullException(nameof(schema)); if (writerProperties == null) throw new ArgumentNullException(nameof(writerProperties)); ExceptionInfo.Check(ParquetFileWriter_OpenFile( - path, schema.Handle.IntPtr, writerProperties.Handle.IntPtr, keyValueMetadata.Handle.IntPtr, out var writer)); + path, schema.Handle.IntPtr, writerProperties.Handle.IntPtr, keyValueMetadata?.Handle.IntPtr ?? IntPtr.Zero, out var writer)); // Keep alive schema and writerProperties until this point, otherwise the GC might kick in while we're in OpenFile(). GC.KeepAlive(schema); @@ -246,7 +299,7 @@ private static ParquetHandle CreateParquetFileWriter( OutputStream outputStream, GroupNode schema, WriterProperties writerProperties, - KeyValueMetadata keyValueMetadata) + KeyValueMetadata? keyValueMetadata) { if (outputStream == null) throw new ArgumentNullException(nameof(outputStream)); if (outputStream.Handle == null) throw new ArgumentNullException(nameof(outputStream.Handle)); @@ -254,7 +307,7 @@ private static ParquetHandle CreateParquetFileWriter( if (writerProperties == null) throw new ArgumentNullException(nameof(writerProperties)); ExceptionInfo.Check(ParquetFileWriter_Open( - outputStream.Handle.IntPtr, schema.Handle.IntPtr, writerProperties.Handle.IntPtr, keyValueMetadata.Handle.IntPtr, out var writer)); + outputStream.Handle.IntPtr, schema.Handle.IntPtr, writerProperties.Handle.IntPtr, keyValueMetadata?.Handle.IntPtr ?? IntPtr.Zero, out var writer)); // Keep alive schema and writerProperties until this point, otherwise the GC might kick in while we're in Open(). GC.KeepAlive(schema); @@ -306,16 +359,15 @@ private static WriterProperties CreateWriterProperties(Compression compression) [DllImport(ParquetDll.Name)] private static extern IntPtr ParquetFileWriter_Descr(IntPtr writer, int i, out IntPtr descr); - [DllImport(ParquetDll.Name)] - private static extern IntPtr ParquetFileWriter_Key_Value_Metadata(IntPtr writer, out IntPtr keyValueMetadata); - [DllImport(ParquetDll.Name)] private static extern IntPtr ParquetFileWriter_Metadata(IntPtr writer, out IntPtr metadata); private readonly ParquetHandle _handle; - private readonly KeyValueMetadata _keyValueMetadata; + private readonly KeyValueMetadata? _parquetKeyValueMetadata; + private readonly IReadOnlyDictionary? _keyValueMetadata; internal readonly Column[]? Columns; private FileMetaData? _fileMetaData; private WriterProperties? _writerProperties; + private bool _keyValueMetadataSet; } } From cca96c8447bf2d5fec11e93d3ae689fd2d1f06b5 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 13 Jun 2022 13:58:57 +1200 Subject: [PATCH 3/3] Document ParquetFileWriter constructor parameters --- csharp/ParquetFileWriter.cs | 84 +++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/csharp/ParquetFileWriter.cs b/csharp/ParquetFileWriter.cs index 6724bf76..bf908ca5 100644 --- a/csharp/ParquetFileWriter.cs +++ b/csharp/ParquetFileWriter.cs @@ -8,6 +8,14 @@ namespace ParquetSharp { public sealed class ParquetFileWriter : IDisposable { + /// + /// Open a new ParquetFileWriter + /// + /// Location to write to + /// Definitions of columns to be written + /// Compression to use for all columns + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( string path, Column[] columns, @@ -25,6 +33,14 @@ public ParquetFileWriter( Columns = columns; } + /// + /// Open a new ParquetFileWriter + /// + /// Stream to write to + /// Definitions of columns to be written + /// Compression to use for all columns + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( OutputStream outputStream, Column[] columns, @@ -42,6 +58,15 @@ public ParquetFileWriter( Columns = columns; } + /// + /// Open a new ParquetFileWriter + /// + /// Location to write to + /// Definitions of columns to be written + /// Custom type factory used to map from dotnet types to Parquet types + /// Compression to use for all columns + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( string path, Column[] columns, @@ -60,6 +85,15 @@ public ParquetFileWriter( Columns = columns; } + /// + /// Open a new ParquetFileWriter + /// + /// Stream to write to + /// Definitions of columns to be written + /// Custom type factory used to map from dotnet types to Parquet types + /// Compression to use for all columns + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( OutputStream outputStream, Column[] columns, @@ -78,6 +112,14 @@ public ParquetFileWriter( Columns = columns; } + /// + /// Open a new ParquetFileWriter + /// + /// Location to write to + /// Definitions of columns to be written + /// Writer properties to use + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( string path, Column[] columns, @@ -94,6 +136,14 @@ public ParquetFileWriter( Columns = columns; } + /// + /// Open a new ParquetFileWriter + /// + /// Stream to write to + /// Definitions of columns to be written + /// Writer properties to use + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( OutputStream outputStream, Column[] columns, @@ -110,6 +160,15 @@ public ParquetFileWriter( Columns = columns; } + /// + /// Open a new ParquetFileWriter + /// + /// Location to write to + /// Definitions of columns to be written + /// Custom type factory used to map from dotnet types to Parquet types + /// Writer properties to use + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( string path, Column[] columns, @@ -127,6 +186,15 @@ public ParquetFileWriter( Columns = columns; } + /// + /// Open a new ParquetFileWriter + /// + /// Stream to write to + /// Definitions of columns to be written + /// Custom type factory used to map from dotnet types to Parquet types + /// Writer properties to use + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( OutputStream outputStream, Column[] columns, @@ -144,6 +212,14 @@ public ParquetFileWriter( Columns = columns; } + /// + /// Open a new ParquetFileWriter + /// + /// Location to write to + /// Root schema node defining the structure of the file + /// Writer properties to use + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( string path, GroupNode schema, @@ -159,6 +235,14 @@ public ParquetFileWriter( Columns = null; } + /// + /// Open a new ParquetFileWriter + /// + /// Stream to write to + /// Root schema node defining the structure of the file + /// Writer properties to use + /// Optional dictionary of key-value metadata. + /// This isn't read until the file is closed, to allow metadata to be modified after data is written. public ParquetFileWriter( OutputStream outputStream, GroupNode schema,