Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow updating key-value metadata after ParquetFileWriter creation #279

Merged
merged 4 commits into from
Jun 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 9 additions & 14 deletions cpp/KeyValueMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,9 @@ using namespace parquet;

extern "C"
{
PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Make(const int64_t size, const char** keys, const char** values, std::shared_ptr<const KeyValueMetadata>** key_value_metadata)
PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_MakeEmpty(std::shared_ptr<KeyValueMetadata>** key_value_metadata)
{
TRYCATCH
(
std::vector<std::string> keys_vector(size);
std::vector<std::string> values_vector(size);

for (int64_t i = 0; i != size; ++i)
{
keys_vector[i] = keys[i];
values_vector[i] = values[i];
}

*key_value_metadata = new std::shared_ptr<const KeyValueMetadata>(new KeyValueMetadata(keys_vector, values_vector));
)
TRYCATCH(*key_value_metadata = new std::shared_ptr<KeyValueMetadata>(new KeyValueMetadata());)
}

PARQUETSHARP_EXPORT void KeyValueMetadata_Free(const std::shared_ptr<const KeyValueMetadata>* key_value_metadata)
Expand All @@ -36,6 +24,13 @@ extern "C"
TRYCATCH(*size = (*key_value_metadata)->size();)
}

PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Append(const std::shared_ptr<KeyValueMetadata>* key_value_metadata, const char* key, const char* value)
{
TRYCATCH(
(*key_value_metadata)->Append(key, value);
)
}

PARQUETSHARP_EXPORT ExceptionInfo* KeyValueMetadata_Get_Entries(const std::shared_ptr<const KeyValueMetadata>* key_value_metadata, const char*** keys, const char*** values)
{
TRYCATCH
Expand Down
9 changes: 0 additions & 9 deletions cpp/ParquetFileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,6 @@ extern "C"
TRYCATCH(*descr = writer->descr(i);)
}

PARQUETSHARP_EXPORT ExceptionInfo* ParquetFileWriter_Key_Value_Metadata(ParquetFileWriter* writer, const std::shared_ptr<const KeyValueMetadata>** key_value_metadata)
{
TRYCATCH
(
const auto& m = writer->key_value_metadata();
*key_value_metadata = m ? new std::shared_ptr(m) : nullptr;
)
}

PARQUETSHARP_EXPORT ExceptionInfo* ParquetFileWriter_Metadata(ParquetFileWriter* writer, const std::shared_ptr<FileMetaData>** metadata)
{
TRYCATCH
Expand Down
194 changes: 194 additions & 0 deletions csharp.test/TestKeyValueMetadata.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
using System.Collections.Generic;
using System.Linq;
using ParquetSharp.IO;
using NUnit.Framework;

namespace ParquetSharp.Test
{
[TestFixture]
internal static class TestKeyValueMetadata
{
[Test]
public static void TestSpecifyingKeyValueMetadataUpFront()
{
var columns = new Column[] {new Column<int>("values")};
var values = Enumerable.Range(0, 100).ToArray();

var expectedKeyValueMetadata = new Dictionary<string, string>
{
{"key1", "value1"},
{"key2", "value2"},
};

using var buffer = new ResizableBuffer();
using (var output = new BufferOutputStream(buffer))
{
using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: expectedKeyValueMetadata);
using var rowGroupWriter = fileWriter.AppendBufferedRowGroup();

using var colWriter = rowGroupWriter.Column(0).LogicalWriter<int>();
colWriter.WriteBatch(values);
fileWriter.Close();
}

using var input = new BufferReader(buffer);
using var fileReader = new ParquetFileReader(input);
var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata;

Assert.That(keyValueMetadata, Is.EqualTo(expectedKeyValueMetadata));

fileReader.Close();
}

[Test]
public static void TestSpecifyingKeyValueMetadataAfterWritingData()
{
var columns = new Column[] {new Column<int>("values")};
var values = Enumerable.Range(0, 100).ToArray();

var keyValueMetadata = new Dictionary<string, string>();

var expectedKeyValueMetadata = new Dictionary<string, string>
{
{"key1", "value1"},
{"key2", "value2"},
};

using var buffer = new ResizableBuffer();
using (var output = new BufferOutputStream(buffer))
{
using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: keyValueMetadata);
using var rowGroupWriter = fileWriter.AppendBufferedRowGroup();

using (var colWriter = rowGroupWriter.Column(0).LogicalWriter<int>())
{
colWriter.WriteBatch(values);
}

foreach (var kvp in expectedKeyValueMetadata)
{
keyValueMetadata[kvp.Key] = kvp.Value;
}

fileWriter.Close();
}

using var input = new BufferReader(buffer);
using var fileReader = new ParquetFileReader(input);
var readKeyValueMetadata = fileReader.FileMetaData.KeyValueMetadata;

Assert.That(readKeyValueMetadata, Is.EqualTo(expectedKeyValueMetadata));

fileReader.Close();
}

[Test]
public static void TestUpdatingKeyValueMetadata()
{
var columns = new Column[] {new Column<int>("values")};
var values = Enumerable.Range(0, 100).ToArray();

var keyValueMetadata = new Dictionary<string, string>
{
{"key1", "value1"},
{"key2", "value2"},
};
var keyValueMetadataUpdate = new Dictionary<string, string>
{
{"key1", "override1"},
{"key3", "value3"},
};
var expectedKeyValueMetadata = new Dictionary<string, string>
{
{"key1", "override1"},
{"key2", "value2"},
{"key3", "value3"},
};

using var buffer = new ResizableBuffer();
using (var output = new BufferOutputStream(buffer))
{
using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: keyValueMetadata);
using var rowGroupWriter = fileWriter.AppendBufferedRowGroup();

using (var colWriter = rowGroupWriter.Column(0).LogicalWriter<int>())
{
colWriter.WriteBatch(values);
}

foreach (var kvp in keyValueMetadataUpdate)
{
keyValueMetadata[kvp.Key] = kvp.Value;
}

fileWriter.Close();
}

using var input = new BufferReader(buffer);
using var fileReader = new ParquetFileReader(input);
var readKeyValueMetadata = fileReader.FileMetaData.KeyValueMetadata;

Assert.That(readKeyValueMetadata, Is.EqualTo(expectedKeyValueMetadata));

fileReader.Close();
}

[Test]
public static void TestNoMetadata()
{
var columns = new Column[] {new Column<int>("values")};
var values = Enumerable.Range(0, 100).ToArray();

using var buffer = new ResizableBuffer();
using (var output = new BufferOutputStream(buffer))
{
using var fileWriter = new ParquetFileWriter(output, columns);
using var rowGroupWriter = fileWriter.AppendBufferedRowGroup();

using var colWriter = rowGroupWriter.Column(0).LogicalWriter<int>();
colWriter.WriteBatch(values);
fileWriter.Close();
}

using var input = new BufferReader(buffer);
using var fileReader = new ParquetFileReader(input);
var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata;

Assert.That(keyValueMetadata, Is.Empty);

fileReader.Close();
}

[Test]
public static void TestWriterNotClosed()
{
var columns = new Column[] {new Column<int>("values")};
var values = Enumerable.Range(0, 100).ToArray();

var expectedKeyValueMetadata = new Dictionary<string, string>
{
{"key1", "value1"},
{"key2", "value2"},
};

using var buffer = new ResizableBuffer();
using (var output = new BufferOutputStream(buffer))
{
using var fileWriter = new ParquetFileWriter(output, columns, keyValueMetadata: expectedKeyValueMetadata);
using var rowGroupWriter = fileWriter.AppendBufferedRowGroup();

using var colWriter = rowGroupWriter.Column(0).LogicalWriter<int>();
colWriter.WriteBatch(values);
// Don't close, rely on Dispose
}

using var input = new BufferReader(buffer);
using var fileReader = new ParquetFileReader(input);
var keyValueMetadata = fileReader.FileMetaData.KeyValueMetadata;

Assert.That(keyValueMetadata, Is.EqualTo(expectedKeyValueMetadata));

fileReader.Close();
}
}
}
44 changes: 20 additions & 24 deletions csharp/KeyValueMetadata.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ namespace ParquetSharp
{
internal sealed class KeyValueMetadata : IDisposable
{
public KeyValueMetadata(IReadOnlyDictionary<string, string> keyValueMetadata)
: this(Make(keyValueMetadata))
public KeyValueMetadata() : this(MakeEmpty())
{
}

Expand All @@ -21,7 +20,18 @@ public void Dispose()
Handle.Dispose();
}

public long Size => ExceptionInfo.Return<long>(Handle, KeyValueMetadata_Size);
private long Size => ExceptionInfo.Return<long>(Handle, KeyValueMetadata_Size);

public void SetData(IReadOnlyDictionary<string, string> keyValueMetadata)
{
using var byteBuffer = new ByteBuffer(1024);
foreach (var entry in keyValueMetadata)
{
var keyPtr = StringUtil.ToCStringUtf8(entry.Key, byteBuffer);
var valuePtr = StringUtil.ToCStringUtf8(entry.Value, byteBuffer);
ExceptionInfo.Check(KeyValueMetadata_Append(Handle.IntPtr, keyPtr, valuePtr));
}
}

public unsafe IReadOnlyDictionary<string, string> ToDictionary()
{
Expand Down Expand Up @@ -50,38 +60,24 @@ public unsafe IReadOnlyDictionary<string, string> ToDictionary()
}
}

private static unsafe IntPtr Make(IReadOnlyDictionary<string, string> keyValueMetadata)
private static IntPtr MakeEmpty()
{
using var byteBuffer = new ByteBuffer(1024);
var keys = new IntPtr[keyValueMetadata.Count];
var values = new IntPtr[keyValueMetadata.Count];
var i = 0;

foreach (var entry in keyValueMetadata)
{
keys[i] = StringUtil.ToCStringUtf8(entry.Key, byteBuffer);
values[i] = StringUtil.ToCStringUtf8(entry.Value, byteBuffer);

++i;
}

fixed (IntPtr* pKeys = keys)
fixed (IntPtr* pValues = values)
{
ExceptionInfo.Check(KeyValueMetadata_Make(values.Length, new IntPtr(pKeys), new IntPtr(pValues), out var handle));
return handle;
}
ExceptionInfo.Check(KeyValueMetadata_MakeEmpty(out var handle));
return handle;
}

[DllImport(ParquetDll.Name)]
private static extern IntPtr KeyValueMetadata_Make(long size, IntPtr keys, IntPtr values, out IntPtr keyValueMetadata);
private static extern IntPtr KeyValueMetadata_MakeEmpty(out IntPtr keyValueMetadata);

[DllImport(ParquetDll.Name)]
private static extern void KeyValueMetadata_Free(IntPtr keyValueMetadata);

[DllImport(ParquetDll.Name)]
private static extern IntPtr KeyValueMetadata_Size(IntPtr keyValueMetadata, out long size);

[DllImport(ParquetDll.Name)]
private static extern IntPtr KeyValueMetadata_Append(IntPtr keyValueMetadata, IntPtr key, IntPtr value);

[DllImport(ParquetDll.Name)]
private static extern IntPtr KeyValueMetadata_Get_Entries(IntPtr keyValueMetadata, out IntPtr keys, out IntPtr values);

Expand Down
Loading