From 227d8387c267809d599cbc8e945e31a8a97c840b Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Thu, 29 Aug 2024 06:34:55 +1200 Subject: [PATCH] Support reading and writing decimals with arbitrary length fixed-length byte array columns and writing with int32 or int64 (#482) --- csharp.test/TestDecimal.cs | 348 +++++++++++++++--- csharp.test/TestDecimal128.cs | 41 +-- csharp.test/TestLogicalTypeRoundtrip.cs | 4 +- csharp/Column.cs | 31 +- csharp/Decimal128.cs | 10 - csharp/DecimalConverter.cs | 140 +++++++ csharp/LogicalRead.cs | 28 +- csharp/LogicalTypeFactory.cs | 11 +- csharp/LogicalWrite.cs | 149 +++++++- .../PublicAPI/net471/PublicAPI.Unshipped.txt | 8 + csharp/PublicAPI/net6/PublicAPI.Unshipped.txt | 8 + .../netstandard2.1/PublicAPI.Unshipped.txt | 8 + csharp/TypeUtils.cs | 11 + 13 files changed, 669 insertions(+), 128 deletions(-) create mode 100644 csharp/DecimalConverter.cs diff --git a/csharp.test/TestDecimal.cs b/csharp.test/TestDecimal.cs index 9c718c5d..11a933e3 100644 --- a/csharp.test/TestDecimal.cs +++ b/csharp.test/TestDecimal.cs @@ -1,5 +1,7 @@ -using System.Collections.Generic; +using System; using System.Linq; +using System.Threading.Tasks; +using Apache.Arrow; using NUnit.Framework; using ParquetSharp.IO; using ParquetSharp.Schema; @@ -10,10 +12,163 @@ namespace ParquetSharp.Test internal static class TestDecimal { [Test] - public static void TestReadInt32PhysicalType() + public static unsafe void TestDecimalConverterToDecimal128RoundTrip() { - // ParquetSharp doesn't currently support writing decimal values - // with int32 physical type, so we need to define the schema manually. + const int precision = 29; + const int scale = 3; + const int typeLength = 16; + const int numRows = 1000; + var random = new Random(1); + var values = Enumerable.Range(0, numRows).Select(_ => RandomDecimal(random, scale)).ToArray(); + + using var byteBuffer = new ByteBuffer(8 * typeLength * numRows); + var converted = new ByteArray[numRows]; + + var multiplier = DecimalConverter.GetScaleMultiplier(scale, precision); + for (var i = 0; i < numRows; ++i) + { + converted[i] = byteBuffer.Allocate(typeLength); + DecimalConverter.WriteDecimal(values[i], converted[i], multiplier); + } + + var read = new decimal[numRows]; + multiplier = DecimalConverter.GetScaleMultiplier(scale, precision); + for (var i = 0; i < numRows; ++i) + { + read[i] = (*(Decimal128*) converted[i].Pointer).ToDecimal(multiplier); + } + + Assert.That(read, Is.EqualTo(values)); + } + + [Test] + public static unsafe void TestDecimal128ToDecimalConverterRoundTrip() + { + const int precision = 29; + const int scale = 3; + const int typeLength = 16; + const int numRows = 1000; + var random = new Random(2); + var values = Enumerable.Range(0, numRows).Select(_ => RandomDecimal(random, scale)).ToArray(); + + using var byteBuffer = new ByteBuffer(8 * typeLength * numRows); + var converted = new ByteArray[numRows]; + + var multiplier = DecimalConverter.GetScaleMultiplier(scale, precision); + for (var i = 0; i < numRows; ++i) + { + converted[i] = byteBuffer.Allocate(typeLength); + *(Decimal128*) converted[i].Pointer = new Decimal128(values[i], multiplier); + } + + var read = new decimal[numRows]; + multiplier = DecimalConverter.GetScaleMultiplier(scale, precision); + for (var i = 0; i < numRows; ++i) + { + read[i] = DecimalConverter.ReadDecimal(converted[i], multiplier); + } + + Assert.That(read, Is.EqualTo(values)); + } + + [TestCase(2, 0, 1)] + [TestCase(6, 0, 3)] + [TestCase(6, 2, 3)] + [TestCase(6, 2, 3)] + [TestCase(9, 8, 4)] + [TestCase(17, 3, 8)] + [TestCase(18, 2, 8)] + [TestCase(21, 3, 9)] + [TestCase(28, 0, 12)] + [TestCase(28, 4, 12)] + [TestCase(29, 0, 16)] // Only requires 13 bytes but we use Decimal128 for this + [TestCase(29, 5, 16)] + [TestCase(30, 6, 13)] + [TestCase(30, 28, 13)] + [TestCase(38, 27, 16)] + public static async Task TestDecimalRoundTrip(int precision, int scale, int expectedTypeLength) + { + const int rowCount = 1000; + using var decimalType = LogicalType.Decimal(precision: precision, scale: scale); + + var columns = new Column[] + { + new Column("decimals", decimalType), + new Column("nullable_decimals", decimalType), + }; + + var random = new Random(123); + var decimalValues = Enumerable.Range(0, rowCount) + .Select(_ => RandomDecimal(random, scale, precision)) + .ToArray(); + var nullableDecimalValues = Enumerable.Range(0, rowCount) + .Select(i => i % 10 == 3 ? (decimal?) null : RandomDecimal(random, scale, precision)) + .ToArray(); + + using var buffer = new ResizableBuffer(); + using (var outStream = new BufferOutputStream(buffer)) + { + using var fileWriter = new ParquetFileWriter(outStream, columns); + using var rowGroupWriter = fileWriter.AppendRowGroup(); + + using var columnWriter = (LogicalColumnWriter) rowGroupWriter.NextColumn().LogicalWriter(); + columnWriter.WriteBatch(decimalValues); + + using var nullableColumnWriter = (LogicalColumnWriter) rowGroupWriter.NextColumn().LogicalWriter(); + nullableColumnWriter.WriteBatch(nullableDecimalValues); + + fileWriter.Close(); + } + + using (var input = new BufferReader(buffer)) + { + using var fileReader = new ParquetFileReader(input); + using var groupReader = fileReader.RowGroup(0); + + using var columnReader = groupReader.Column(0).LogicalReader(); + + Assert.That(columnReader.ColumnDescriptor.TypeLength, Is.EqualTo(expectedTypeLength)); + var readValues = columnReader.ReadAll((int) groupReader.MetaData.NumRows); + Assert.That(readValues, Is.EqualTo(decimalValues)); + + using var nullableColumnReader = groupReader.Column(1).LogicalReader(); + + Assert.That(nullableColumnReader.ColumnDescriptor.TypeLength, Is.EqualTo(expectedTypeLength)); + var nullableReadValues = nullableColumnReader.ReadAll((int) groupReader.MetaData.NumRows); + Assert.That(nullableReadValues, Is.EqualTo(nullableDecimalValues)); + } + + using (var input = new BufferReader(buffer)) + { + // Verify we get the same values if using the Arrow format reader + using var fileReader = new ParquetSharp.Arrow.FileReader(input); + using var batchReader = fileReader.GetRecordBatchReader(); + while (await batchReader.ReadNextRecordBatchAsync() is { } batch) + { + using (batch) + { + var column = batch.Column(0) as Decimal128Array; + Assert.That(column, Is.Not.Null); + Assert.That(column!.NullCount, Is.Zero); + var readValues = Enumerable.Range(0, rowCount).Select(i => column.GetValue(i)!.Value).ToArray(); + + Assert.That(readValues, Is.EqualTo(decimalValues)); + + var nullableColumn = batch.Column(1) as Decimal128Array; + Assert.That(nullableColumn, Is.Not.Null); + var nullableReadValues = Enumerable.Range(0, rowCount).Select(i => nullableColumn!.GetValue(i)).ToArray(); + + Assert.That(nullableReadValues, Is.EqualTo(nullableDecimalValues)); + } + } + } + } + + [Test] + public static void TestInt32DecimalRoundTrip() + { + // The Column class doesn't support overriding the physical type, + // so we need to define the schema manually. using var decimalType = LogicalType.Decimal(precision: 9, scale: 4); using var colNode = new PrimitiveNode("value", Repetition.Required, decimalType, PhysicalType.Int32); using var schema = new GroupNode("schema", Repetition.Required, new Node[] {colNode}); @@ -22,8 +177,8 @@ public static void TestReadInt32PhysicalType() .Select(i => i - 5_000) .Concat(new[] {int.MinValue, int.MinValue + 1, int.MaxValue - 1, int.MaxValue}) .ToArray(); - var scale = new decimal(10000); - var expectedValues = physicalValues.Select(v => new decimal(v) / scale).ToArray(); + var multiplier = new decimal(10000); + var decimalValues = physicalValues.Select(v => new decimal(v) / multiplier).ToArray(); using var buffer = new ResizableBuffer(); using (var outStream = new BufferOutputStream(buffer)) @@ -32,9 +187,9 @@ public static void TestReadInt32PhysicalType() using var writerProperties = propertiesBuilder.Build(); using var fileWriter = new ParquetFileWriter(outStream, schema, writerProperties); using var rowGroupWriter = fileWriter.AppendRowGroup(); - using var columnWriter = (ColumnWriter) rowGroupWriter.NextColumn(); + using var columnWriter = (LogicalColumnWriter) rowGroupWriter.NextColumn().LogicalWriter(); - columnWriter.WriteBatch(physicalValues); + columnWriter.WriteBatch(decimalValues); fileWriter.Close(); } @@ -46,34 +201,29 @@ public static void TestReadInt32PhysicalType() using var columnReader = groupReader.Column(0).LogicalReader(); var readValues = columnReader.ReadAll((int) groupReader.MetaData.NumRows); - Assert.That(readValues, Is.EqualTo(expectedValues)); + Assert.That(readValues, Is.EqualTo(decimalValues)); } [Test] - public static void TestReadNullableDataWithInt32PhysicalType() + public static void TestNullableInt32DecimalRoundTrip() { using var decimalType = LogicalType.Decimal(precision: 9, scale: 4); using var colNode = new PrimitiveNode("value", Repetition.Optional, decimalType, PhysicalType.Int32); using var schema = new GroupNode("schema", Repetition.Required, new Node[] {colNode}); - var physicalValues = new List(); - var defLevels = new List(); - var expectedValues = new List(); - const int numValues = 10_000; + var decimalValues = new decimal?[numValues]; + for (var i = 0; i < numValues; ++i) { if (i % 10 == 0) { - defLevels.Add(0); - expectedValues.Add(null); + decimalValues[i] = null; } else { var physicalValue = i - 5_000; - physicalValues.Add(physicalValue); - defLevels.Add(1); - expectedValues.Add(new decimal(physicalValue) / 10_000); + decimalValues[i] = new decimal(physicalValue) / 10_000; } } @@ -84,9 +234,9 @@ public static void TestReadNullableDataWithInt32PhysicalType() using var writerProperties = propertiesBuilder.Build(); using var fileWriter = new ParquetFileWriter(outStream, schema, writerProperties); using var rowGroupWriter = fileWriter.AppendRowGroup(); - using var columnWriter = (ColumnWriter) rowGroupWriter.NextColumn(); + using var columnWriter = (LogicalColumnWriter) rowGroupWriter.NextColumn().LogicalWriter(); - columnWriter.WriteBatch(numValues, defLevels.ToArray(), null, physicalValues.ToArray()); + columnWriter.WriteBatch(decimalValues); fileWriter.Close(); } @@ -98,14 +248,14 @@ public static void TestReadNullableDataWithInt32PhysicalType() using var columnReader = groupReader.Column(0).LogicalReader(); var readValues = columnReader.ReadAll((int) groupReader.MetaData.NumRows); - Assert.That(readValues, Is.EqualTo(expectedValues.ToArray())); + Assert.That(readValues, Is.EqualTo(decimalValues.ToArray())); } [Test] - public static void TestReadInt64PhysicalType() + public static void TestInt64DecimalRoundTrip() { - // ParquetSharp doesn't currently support writing decimal values - // with int64 physical type, so we need to define the schema manually. + // The Column class doesn't support overriding the physical type, + // so we need to define the schema manually. using var decimalType = LogicalType.Decimal(precision: 10, scale: 4); using var colNode = new PrimitiveNode("value", Repetition.Required, decimalType, PhysicalType.Int64); using var schema = new GroupNode("schema", Repetition.Required, new Node[] {colNode}); @@ -114,8 +264,8 @@ public static void TestReadInt64PhysicalType() .Select(i => (long) (i - 5_000)) .Concat(new[] {long.MinValue, long.MinValue + 1, long.MaxValue - 1, long.MaxValue}) .ToArray(); - var scale = new decimal(10000); - var expectedValues = physicalValues.Select(v => new decimal(v) / scale).ToArray(); + var multiplier = new decimal(10000); + var decimalValues = physicalValues.Select(v => new decimal(v) / multiplier).ToArray(); using var buffer = new ResizableBuffer(); using (var outStream = new BufferOutputStream(buffer)) @@ -124,9 +274,9 @@ public static void TestReadInt64PhysicalType() using var writerProperties = propertiesBuilder.Build(); using var fileWriter = new ParquetFileWriter(outStream, schema, writerProperties); using var rowGroupWriter = fileWriter.AppendRowGroup(); - using var columnWriter = (ColumnWriter) rowGroupWriter.NextColumn(); + using var columnWriter = (LogicalColumnWriter) rowGroupWriter.NextColumn().LogicalWriter(); - columnWriter.WriteBatch(physicalValues); + columnWriter.WriteBatch(decimalValues); fileWriter.Close(); } @@ -138,34 +288,29 @@ public static void TestReadInt64PhysicalType() using var columnReader = groupReader.Column(0).LogicalReader(); var readValues = columnReader.ReadAll((int) groupReader.MetaData.NumRows); - Assert.That(readValues, Is.EqualTo(expectedValues)); + Assert.That(readValues, Is.EqualTo(decimalValues)); } [Test] - public static void TestReadNullableDataWithInt64PhysicalType() + public static void TestNullableInt64DecimalRoundTrip() { using var decimalType = LogicalType.Decimal(precision: 10, scale: 4); using var colNode = new PrimitiveNode("value", Repetition.Optional, decimalType, PhysicalType.Int64); using var schema = new GroupNode("schema", Repetition.Required, new Node[] {colNode}); - var physicalValues = new List(); - var defLevels = new List(); - var expectedValues = new List(); - const int numValues = 10_000; + var decimalValues = new decimal?[numValues]; + for (var i = 0; i < numValues; ++i) { if (i % 10 == 0) { - defLevels.Add(0); - expectedValues.Add(null); + decimalValues[i] = null; } else { var physicalValue = i - 5_000; - physicalValues.Add(physicalValue); - defLevels.Add(1); - expectedValues.Add(new decimal(physicalValue) / 10_000); + decimalValues[i] = new decimal(physicalValue) / 10_000; } } @@ -176,9 +321,9 @@ public static void TestReadNullableDataWithInt64PhysicalType() using var writerProperties = propertiesBuilder.Build(); using var fileWriter = new ParquetFileWriter(outStream, schema, writerProperties); using var rowGroupWriter = fileWriter.AppendRowGroup(); - using var columnWriter = (ColumnWriter) rowGroupWriter.NextColumn(); + using var columnWriter = (LogicalColumnWriter) rowGroupWriter.NextColumn().LogicalWriter(); - columnWriter.WriteBatch(numValues, defLevels.ToArray(), null, physicalValues.ToArray()); + columnWriter.WriteBatch(decimalValues); fileWriter.Close(); } @@ -190,7 +335,124 @@ public static void TestReadNullableDataWithInt64PhysicalType() using var columnReader = groupReader.Column(0).LogicalReader(); var readValues = columnReader.ReadAll((int) groupReader.MetaData.NumRows); - Assert.That(readValues, Is.EqualTo(expectedValues.ToArray())); + Assert.That(readValues, Is.EqualTo(decimalValues.ToArray())); + } + + [Test] + public static void ThrowsWithInvalidScale() + { + Assert.Throws(() => LogicalType.Decimal(precision: 28, scale: 29)); + } + + [Test] + public static void ThrowsWithInsufficientTypeLength() + { + using var decimalType = LogicalType.Decimal(precision: 20, scale: 3); + var columns = new Column[] {new Column(typeof(decimal), "Decimal", decimalType, length: 5)}; + + using var buffer = new ResizableBuffer(); + using var outStream = new BufferOutputStream(buffer); + Assert.Throws(() => new ParquetFileWriter(outStream, columns)); + } + + [Test] + public static void WriteValueTooLargeForPrecision() + { + using var decimalType = LogicalType.Decimal(precision: 9, scale: 3); + var columns = new Column[] + { + new Column("decimals", decimalType), + }; + + var decimalValues = new[] + { + new decimal(10_000_000_000) / 1000, + }; + + using var buffer = new ResizableBuffer(); + using var outStream = new BufferOutputStream(buffer); + using var fileWriter = new ParquetFileWriter(outStream, columns); + using var rowGroupWriter = fileWriter.AppendRowGroup(); + + using var columnWriter = (LogicalColumnWriter) rowGroupWriter.NextColumn().LogicalWriter(); + Assert.Throws(() => columnWriter.WriteBatch(decimalValues)); + + fileWriter.Close(); + } + + [TestCase(30)] + [TestCase(33)] + [TestCase(38)] + public static unsafe void ReadValueTooLargeForDecimal(int precision) + { + using var decimalType = LogicalType.Decimal(precision: precision, scale: 3); + var columns = new Column[] + { + new Column("decimals", decimalType), + }; + + using var buffer = new ResizableBuffer(); + int typeLength; + using (var outStream = new BufferOutputStream(buffer)) + { + using var fileWriter = new ParquetFileWriter(outStream, columns); + using var rowGroupWriter = fileWriter.AppendRowGroup(); + + using var columnWriter = (ColumnWriter) rowGroupWriter.NextColumn(); + typeLength = columnWriter.ColumnDescriptor.TypeLength; + using var byteBuffer = new ByteBuffer(typeLength); + var byteArray = byteBuffer.Allocate(typeLength); + // Leave the most significant bit (the sign bit) as zero and set all other bits to 1 + ((byte*) byteArray.Pointer)[0] = 127; + for (int i = 1; i < typeLength; ++i) + { + ((byte*) byteArray.Pointer)[i] = 255; + } + columnWriter.WriteBatch(new[] {new FixedLenByteArray(byteArray.Pointer)}); + + fileWriter.Close(); + } + + using (var input = new BufferReader(buffer)) + { + using var fileReader = new ParquetFileReader(input); + using var groupReader = fileReader.RowGroup(0); + + using var columnReader = groupReader.Column(0).LogicalReader(); + + Assert.That(columnReader.ColumnDescriptor.TypeLength, Is.EqualTo(typeLength)); + Assert.Throws(() => columnReader.ReadAll((int) groupReader.MetaData.NumRows)); + } + } + + [Test] + public static void TestScaleMultiplier() + { + Assert.AreEqual(1M, DecimalConverter.GetScaleMultiplier(0, 29)); + Assert.AreEqual(10M, DecimalConverter.GetScaleMultiplier(1, 29)); + Assert.AreEqual(100M, DecimalConverter.GetScaleMultiplier(2, 29)); + Assert.AreEqual(1e+028M, DecimalConverter.GetScaleMultiplier(28, 29)); + } + + private static decimal RandomDecimal(Random random, int scale, int parquetPrecision = 29) + { + var low = RandomInt(random); + var mid = RandomInt(random); + var high = RandomInt(random); + var negative = random.Next(0, 2) == 0; + var value = new decimal(low, mid, high, negative, (byte) scale); + if (parquetPrecision < 29) + { + value = decimal.Round(value * new decimal(Math.Pow(10, parquetPrecision - 29)), scale); + } + return value; + } + + private static int RandomInt(Random random) + { + var buffer = new byte[sizeof(int)]; + random.NextBytes(buffer); + return BitConverter.ToInt32(buffer, 0); } } } diff --git a/csharp.test/TestDecimal128.cs b/csharp.test/TestDecimal128.cs index 9bfb785e..4f3d617c 100644 --- a/csharp.test/TestDecimal128.cs +++ b/csharp.test/TestDecimal128.cs @@ -25,7 +25,7 @@ public static void TestRoundTrip(int scale) list.Add(decimal.MaxValue); - var multiplier = Decimal128.GetScaleMultiplier(scale); + var multiplier = DecimalConverter.GetScaleMultiplier(scale, precision: 29); var decimals = list.Select(v => v / multiplier).ToArray(); foreach (var value in decimals) @@ -36,15 +36,6 @@ public static void TestRoundTrip(int scale) } } - [Test] - public static void TestScaleMultiplier() - { - Assert.AreEqual(1M, Decimal128.GetScaleMultiplier(0)); - Assert.AreEqual(10M, Decimal128.GetScaleMultiplier(1)); - Assert.AreEqual(100M, Decimal128.GetScaleMultiplier(2)); - Assert.AreEqual(1e+028M, Decimal128.GetScaleMultiplier(28)); - } - [Test] [SetCulture("en-US")] public static void TestScaleOverflow() @@ -96,35 +87,5 @@ public static void TestAgainstThirdParty() var read = (decimal[]) rowGroupReader.ReadColumn(fileReader.Schema.GetDataFields()[0]).Data; Assert.AreEqual(values, read); } - - [Test] - public static void TestThrowsWithUnsupportedPrecision() - { - using var decimalType = LogicalType.Decimal(precision: 28, scale: 3); - var columns = new Column[] {new Column("Decimal", decimalType)}; - - using var buffer = new ResizableBuffer(); - using var outStream = new BufferOutputStream(buffer); - using var fileWriter = new ParquetFileWriter(outStream, columns); - using var rowGroupWriter = fileWriter.AppendRowGroup(); - var exception = Assert.Throws(() => { rowGroupWriter.NextColumn().LogicalWriter(); }); - Assert.That(exception!.Message, Does.Contain("29 digits of precision")); - fileWriter.Close(); - } - - [Test] - public static void TestThrowsWithUnsupportedLength() - { - using var decimalType = LogicalType.Decimal(precision: 29, scale: 3); - var columns = new Column[] {new Column(typeof(decimal), "Decimal", decimalType, 13)}; - - using var buffer = new ResizableBuffer(); - using var outStream = new BufferOutputStream(buffer); - using var fileWriter = new ParquetFileWriter(outStream, columns); - using var rowGroupWriter = fileWriter.AppendRowGroup(); - var exception = Assert.Throws(() => { rowGroupWriter.NextColumn().LogicalWriter(); }); - Assert.That(exception!.Message, Does.Contain("16 bytes of decimal length")); - fileWriter.Close(); - } } } diff --git a/csharp.test/TestLogicalTypeRoundtrip.cs b/csharp.test/TestLogicalTypeRoundtrip.cs index b9ba77b3..04a435dc 100644 --- a/csharp.test/TestLogicalTypeRoundtrip.cs +++ b/csharp.test/TestLogicalTypeRoundtrip.cs @@ -2076,7 +2076,7 @@ private static ExpectedColumn[] CreateExpectedColumns() Min = -10m, Max = ((NumRows - 1m) * (NumRows - 1m) * (NumRows - 1m)) / 1000 - 10, Converter = (v, descr) => LogicalRead.ToDecimal( - (FixedLenByteArray) v, Decimal128.GetScaleMultiplier(descr.TypeScale)) + (FixedLenByteArray) v, DecimalConverter.GetScaleMultiplier(descr.TypeScale, descr.TypePrecision)) }, new ExpectedColumn { @@ -2091,7 +2091,7 @@ private static ExpectedColumn[] CreateExpectedColumns() Min = -9.999m, Max = ((NumRows - 1m) * (NumRows - 1m) * (NumRows - 1m)) / 1000 - 10, Converter = (v, descr) => LogicalRead.ToDecimal( - (FixedLenByteArray) v, Decimal128.GetScaleMultiplier(descr.TypeScale)) + (FixedLenByteArray) v, DecimalConverter.GetScaleMultiplier(descr.TypeScale, descr.TypePrecision)) }, new ExpectedColumn { diff --git a/csharp/Column.cs b/csharp/Column.cs index 8f03e761..f90d7d06 100644 --- a/csharp/Column.cs +++ b/csharp/Column.cs @@ -13,7 +13,7 @@ public class Column #pragma warning disable RS0027 public Column(Type logicalSystemType, string name, LogicalType? logicalTypeOverride = null) - : this(logicalSystemType, name, logicalTypeOverride, GetTypeLength(logicalSystemType)) + : this(logicalSystemType, name, logicalTypeOverride, GetTypeLength(logicalSystemType, logicalTypeOverride)) { LogicalSystemType = logicalSystemType ?? throw new ArgumentNullException(nameof(logicalSystemType)); Name = name ?? throw new ArgumentNullException(nameof(name)); @@ -107,11 +107,36 @@ public static GroupNode CreateSchemaNode(Column[] columns, LogicalTypeFactory lo #pragma warning restore RS0026 - private static unsafe int GetTypeLength(Type logicalSystemType) + private static unsafe int GetTypeLength(Type logicalSystemType, LogicalType? logicalTypeOverride) { if (logicalSystemType == typeof(decimal) || logicalSystemType == typeof(decimal?)) { - return sizeof(Decimal128); + if (!(logicalTypeOverride is DecimalLogicalType decimalType)) + { + throw new ArgumentException("decimal type requires a DecimalLogicalType override"); + } + + // Older versions of ParquetSharp only supported writing with a precision of 29, + // corresponding to the maximum precision supported by C# decimal values. + // Decimals were written as 16 byte arrays and reading only supported 16 byte arrays. + // So for backwards compatibility, if the precision is 29 we still write 16 byte values. + if (decimalType.Precision == 29) + { + return sizeof(Decimal128); + } + + // For other precisions, work out the size of array required + var typeLength = 1; + while (true) + { + var maxPrecision = DecimalConverter.MaxPrecision(typeLength); + if (maxPrecision >= decimalType.Precision) + { + return typeLength; + } + + ++typeLength; + } } if (logicalSystemType == typeof(Guid) || logicalSystemType == typeof(Guid?)) diff --git a/csharp/Decimal128.cs b/csharp/Decimal128.cs index c0c902b3..5b0ed392 100644 --- a/csharp/Decimal128.cs +++ b/csharp/Decimal128.cs @@ -101,16 +101,6 @@ public decimal ToDecimal(decimal multiplier) return unscaled / multiplier; } - public static decimal GetScaleMultiplier(int scale) - { - if (scale < 0 || scale > 28) - { - throw new ArgumentOutOfRangeException(nameof(scale), "scale must be a value in [0, 28]"); - } - - return (decimal) Math.Pow(10, scale); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void TwosComplement(uint* ptr) { diff --git a/csharp/DecimalConverter.cs b/csharp/DecimalConverter.cs new file mode 100644 index 00000000..01fe6415 --- /dev/null +++ b/csharp/DecimalConverter.cs @@ -0,0 +1,140 @@ +using System; +using System.Runtime.CompilerServices; + +namespace ParquetSharp +{ + /// + /// This is a more flexible converter for decimal data stored in arbitrary length byte arrays, + /// as opposed to Decimal128 which only works with 16 byte values but is more performant. + /// + internal static class DecimalConverter + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe decimal ReadDecimal(ByteArray byteArray, decimal multiplier) + { + if (byteArray.Length == 0) + { + return new decimal(0); + } + + // Read into little-Endian ordered array + var tmp = stackalloc byte[byteArray.Length]; + for (var byteIdx = 0; byteIdx < byteArray.Length; ++byteIdx) + { + tmp[byteArray.Length - byteIdx - 1] = *((byte*) byteArray.Pointer + byteIdx); + } + + var negative = false; + if ((tmp[byteArray.Length - 1] & (1 << 7)) == 1 << 7) + { + negative = true; + TwosComplement(tmp, byteArray.Length); + } + + var unscaled = new decimal(tmp[0]); + var numUsableBytes = Math.Min(byteArray.Length, 12); + decimal byteMultiplier = 1; + for (var byteIdx = 1; byteIdx < numUsableBytes; ++byteIdx) + { + byteMultiplier *= 256; + unscaled += byteMultiplier * tmp[byteIdx]; + } + + for (var byteIdx = numUsableBytes; byteIdx < byteArray.Length; ++byteIdx) + { + if (tmp[byteIdx] > 0) + { + throw new OverflowException("Decimal value is not representable as a .NET Decimal"); + } + } + + if (negative) + { + unscaled *= -1; + } + + return unscaled / multiplier; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void WriteDecimal(decimal value, ByteArray byteArray, decimal multiplier) + { + decimal unscaled; + + try + { + unscaled = decimal.Truncate(value * multiplier); + } + catch (OverflowException exception) + { + throw new OverflowException($"value {value:E} is too large for decimal scale {Math.Log10((double) multiplier)}", exception); + } + + var negative = unscaled < 0; + if (negative) + { + unscaled *= -1; + } + + // Compute little-endian representation of unscaled value + var tmp = stackalloc byte[byteArray.Length]; + for (var byteIdx = 0; byteIdx < byteArray.Length; ++byteIdx) + { + var remainder = unscaled % 256; + tmp[byteIdx] = (byte) remainder; + unscaled = (unscaled - remainder) / 256; + } + + if (unscaled != 0) + { + throw new OverflowException( + $"value {value:E} is too large to be represented by {byteArray.Length} bytes with decimal scale {Math.Log10((double) multiplier)}"); + } + + if (negative) + { + TwosComplement(tmp, byteArray.Length); + } + + // Reverse bytes to get big-Endian representation, writing into output + for (var i = 0; i < byteArray.Length; ++i) + { + *((byte*) byteArray.Pointer + i) = tmp[byteArray.Length - i - 1]; + } + } + + public static int MaxPrecision(int typeLength) + { + return (int) Math.Floor(Math.Log10(Math.Pow(2.0, 8.0 * typeLength - 1) - 1)); + } + + public static decimal GetScaleMultiplier(int scale, int precision) + { + if (scale < 0 || scale > precision) + { + throw new ArgumentOutOfRangeException(nameof(scale), $"scale must be in the range [0, precision ({precision})]"); + } + + return (decimal) Math.Pow(10, scale); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void TwosComplement(byte* byteArray, int length) + { + byte carry = 0; + byteArray[0] = AddCarry((byte) ~byteArray[0], 1, ref carry); + for (int i = 1; i < length; ++i) + { + byteArray[i] = AddCarry((byte) ~byteArray[i], 0, ref carry); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static byte AddCarry(byte left, byte right, ref byte carry) + { + var r = (uint) left + right + carry; + carry = (byte) (r >> 8); + return (byte) r; + } + } +} diff --git a/csharp/LogicalRead.cs b/csharp/LogicalRead.cs index 0f61a0c7..d267e725 100644 --- a/csharp/LogicalRead.cs +++ b/csharp/LogicalRead.cs @@ -125,7 +125,7 @@ public static Delegate GetConverter(ColumnDescriptor columnDescriptor, ColumnChu if (typeof(TLogical) == typeof(decimal)) { - var multiplier = Decimal128.GetScaleMultiplier(columnDescriptor.TypeScale); + var multiplier = DecimalConverter.GetScaleMultiplier(columnDescriptor.TypeScale, columnDescriptor.TypePrecision); if (typeof(TPhysical) == typeof(int)) { return (LogicalRead.Converter) ((s, _, d, _) => LogicalRead.ConvertDecimal32(s, d, multiplier)); @@ -136,13 +136,15 @@ public static Delegate GetConverter(ColumnDescriptor columnDescriptor, ColumnChu } if (typeof(TPhysical) == typeof(FixedLenByteArray)) { - return (LogicalRead.Converter) ((s, _, d, _) => LogicalRead.ConvertDecimal128(s, d, multiplier)); + return TypeUtils.UseDecimal128(columnDescriptor) + ? (LogicalRead.Converter) ((s, _, d, _) => LogicalRead.ConvertDecimal128(s, d, multiplier)) + : (LogicalRead.Converter) ((s, _, d, _) => LogicalRead.ConvertDecimal(s, d, multiplier, columnDescriptor.TypeLength)); } } if (typeof(TLogical) == typeof(decimal?)) { - var multiplier = Decimal128.GetScaleMultiplier(columnDescriptor.TypeScale); + var multiplier = DecimalConverter.GetScaleMultiplier(columnDescriptor.TypeScale, columnDescriptor.TypePrecision); if (typeof(TPhysical) == typeof(int)) { return (LogicalRead.Converter) ((s, dl, d, del) => LogicalRead.ConvertDecimal32(s, dl, d, multiplier, del)); @@ -153,7 +155,9 @@ public static Delegate GetConverter(ColumnDescriptor columnDescriptor, ColumnChu } if (typeof(TPhysical) == typeof(FixedLenByteArray)) { - return (LogicalRead.Converter) ((s, dl, d, del) => LogicalRead.ConvertDecimal128(s, dl, d, multiplier, del)); + return TypeUtils.UseDecimal128(columnDescriptor) + ? (LogicalRead.Converter) ((s, dl, d, del) => LogicalRead.ConvertDecimal128(s, dl, d, multiplier, del)) + : (LogicalRead.Converter) ((s, dl, d, del) => LogicalRead.ConvertDecimal(s, dl, d, multiplier, columnDescriptor.TypeLength, del)); } } @@ -511,6 +515,22 @@ public static void ConvertDecimal128(ReadOnlySpan source, Rea } } + public static void ConvertDecimal(ReadOnlySpan source, Span destination, decimal multiplier, int typeLength) + { + for (int i = 0; i < destination.Length; ++i) + { + destination[i] = DecimalConverter.ReadDecimal(new ByteArray(source[i].Pointer, typeLength), multiplier); + } + } + + public static void ConvertDecimal(ReadOnlySpan source, ReadOnlySpan defLevels, Span destination, decimal multiplier, int typeLength, short definedLevel) + { + for (int i = 0, src = 0; i < destination.Length; ++i) + { + destination[i] = defLevels[i] != definedLevel ? default(decimal?) : DecimalConverter.ReadDecimal(new ByteArray(source[src++].Pointer, typeLength), multiplier); + } + } + public static void ConvertUuid(ReadOnlySpan source, Span destination) { for (int i = 0; i < destination.Length; ++i) diff --git a/csharp/LogicalTypeFactory.cs b/csharp/LogicalTypeFactory.cs index 33c9f5d1..af717e61 100644 --- a/csharp/LogicalTypeFactory.cs +++ b/csharp/LogicalTypeFactory.cs @@ -145,13 +145,12 @@ public virtual unsafe (Type physicalType, Type logicalType) GetSystemTypes(Colum } case PhysicalType.FixedLenByteArray: { - if (descriptor.TypeLength != sizeof(Decimal128)) + var maxPrecision = DecimalConverter.MaxPrecision(descriptor.TypeLength); + if (descriptor.TypePrecision > maxPrecision) { - throw new NotSupportedException($"only {sizeof(Decimal128)} bytes of decimal length is supported with fixed-length byte array data"); - } - if (descriptor.TypePrecision > 29) - { - throw new NotSupportedException("only max 29 digits of decimal precision is supported with fixed-length byte array data"); + throw new NotSupportedException( + $"A maximum of {maxPrecision} digits of decimal precision is supported with fixed length byte arrays " + + $"of length {descriptor.TypeLength} (specified precision is {descriptor.TypePrecision})"); } return (typeof(FixedLenByteArray), nullable ? typeof(decimal?) : typeof(decimal)); } diff --git a/csharp/LogicalWrite.cs b/csharp/LogicalWrite.cs index 52fca6f0..3e6190a9 100644 --- a/csharp/LogicalWrite.cs +++ b/csharp/LogicalWrite.cs @@ -98,17 +98,53 @@ public static Delegate GetConverter(ColumnDescriptor columnDescriptor, ByteBuffe if (typeof(TLogical) == typeof(decimal)) { ValidateDecimalColumn(columnDescriptor); - if (byteBuffer == null) throw new ArgumentNullException(nameof(byteBuffer)); - var multiplier = Decimal128.GetScaleMultiplier(columnDescriptor.TypeScale); - return (LogicalWrite.Converter) ((s, _, d, _) => LogicalWrite.ConvertDecimal128(s, d, multiplier, byteBuffer)); + var multiplier = DecimalConverter.GetScaleMultiplier(columnDescriptor.TypeScale, columnDescriptor.TypePrecision); + if (typeof(TPhysical) == typeof(FixedLenByteArray)) + { + if (byteBuffer == null) + { + throw new ArgumentNullException(nameof(byteBuffer)); + } + return TypeUtils.UseDecimal128(columnDescriptor) + ? (LogicalWrite.Converter) ((s, _, d, _) => LogicalWrite.ConvertDecimal128(s, d, multiplier, byteBuffer)) + : (LogicalWrite.Converter) ((s, _, d, _) => LogicalWrite.ConvertDecimal(s, d, multiplier, byteBuffer, columnDescriptor.TypeLength)); + } + if (typeof(TPhysical) == typeof(int)) + { + return (LogicalWrite.Converter) ((s, _, d, _) => LogicalWrite.ConvertDecimal(s, d, multiplier)); + } + if (typeof(TPhysical) == typeof(long)) + { + return (LogicalWrite.Converter) ((s, _, d, _) => LogicalWrite.ConvertDecimal(s, d, multiplier)); + } + + throw new NotSupportedException("Writing decimal data is only supported with fixed-length byte array, int32, and int64 physical types"); } if (typeof(TLogical) == typeof(decimal?)) { ValidateDecimalColumn(columnDescriptor); - if (byteBuffer == null) throw new ArgumentNullException(nameof(byteBuffer)); - var multiplier = Decimal128.GetScaleMultiplier(columnDescriptor.TypeScale); - return (LogicalWrite.Converter) ((s, dl, d, nl) => LogicalWrite.ConvertDecimal128(s, dl, d, multiplier, nl, byteBuffer)); + var multiplier = DecimalConverter.GetScaleMultiplier(columnDescriptor.TypeScale, columnDescriptor.TypePrecision); + if (typeof(TPhysical) == typeof(FixedLenByteArray)) + { + if (byteBuffer == null) + { + throw new ArgumentNullException(nameof(byteBuffer)); + } + return TypeUtils.UseDecimal128(columnDescriptor) + ? (LogicalWrite.Converter) ((s, dl, d, nl) => LogicalWrite.ConvertDecimal128(s, dl, d, multiplier, nl, byteBuffer)) + : (LogicalWrite.Converter) ((s, dl, d, nl) => LogicalWrite.ConvertDecimal(s, dl, d, multiplier, nl, byteBuffer, columnDescriptor.TypeLength)); + } + if (typeof(TPhysical) == typeof(int)) + { + return (LogicalWrite.Converter) ((s, dl, d, nl) => LogicalWrite.ConvertDecimal(s, dl, d, multiplier, nl)); + } + if (typeof(TPhysical) == typeof(long)) + { + return (LogicalWrite.Converter) ((s, dl, d, nl) => LogicalWrite.ConvertDecimal(s, dl, d, multiplier, nl)); + } + + throw new NotSupportedException("Writing decimal data is only supported with fixed-length byte array, int32, and int64 physical types"); } if (typeof(TLogical) == typeof(Guid)) @@ -264,20 +300,14 @@ public static Delegate GetConverter(ColumnDescriptor columnDescriptor, ByteBuffe private static unsafe void ValidateDecimalColumn(ColumnDescriptor columnDescriptor) { - // For the moment we only support serializing decimal to Decimal128. - // This reflects the C# decimal structure with 28-29 digits precision. - // Will implement 32-bits, 64-bits and other precision later. - if (typeof(TPhysical) != typeof(FixedLenByteArray)) - { - throw new NotSupportedException("Writing decimal data is only supported with a fixed-length byte array physical type"); - } - if (columnDescriptor.TypePrecision != 29) - { - throw new NotSupportedException("only 29 digits of precision is currently supported for decimal type"); - } - if (columnDescriptor.TypeLength != sizeof(Decimal128)) + var typeLength = typeof(TPhysical) == typeof(FixedLenByteArray) ? columnDescriptor.TypeLength : sizeof(TPhysical); + + var maxPrecision = DecimalConverter.MaxPrecision(typeLength); + if (columnDescriptor.TypePrecision > maxPrecision) { - throw new NotSupportedException("only 16 bytes of length is currently supported for decimal type "); + throw new NotSupportedException( + $"A maximum of {maxPrecision} digits of decimal precision is supported with a type length of {typeLength} " + + $"(requested precision is {columnDescriptor.TypePrecision})"); } } } @@ -449,6 +479,85 @@ public static void ConvertDecimal128(ReadOnlySpan source, Span } } + public static void ConvertDecimal(ReadOnlySpan source, Span destination, decimal multiplier, ByteBuffer byteBuffer, int typeLength) + { + for (int i = 0; i < source.Length; ++i) + { + var byteArray = byteBuffer.Allocate(typeLength); + DecimalConverter.WriteDecimal(source[i], byteArray, multiplier); + destination[i] = new FixedLenByteArray(byteArray.Pointer); + } + } + + public static void ConvertDecimal(ReadOnlySpan source, Span defLevels, Span destination, decimal multiplier, short nullLevel, ByteBuffer byteBuffer, int typeLength) + { + for (int i = 0, dst = 0; i < source.Length; ++i) + { + var value = source[i]; + if (value == null) + { + defLevels[i] = nullLevel; + } + else + { + var byteArray = byteBuffer.Allocate(typeLength); + DecimalConverter.WriteDecimal(value.Value, byteArray, multiplier); + destination[dst++] = new FixedLenByteArray(byteArray.Pointer); + defLevels[i] = (short) (nullLevel + 1); + } + } + } + + public static void ConvertDecimal(ReadOnlySpan source, Span destination, decimal multiplier) + { + for (int i = 0; i < source.Length; ++i) + { + destination[i] = (int) (source[i] * multiplier); + } + } + + public static void ConvertDecimal(ReadOnlySpan source, Span defLevels, Span destination, decimal multiplier, short nullLevel) + { + for (int i = 0, dst = 0; i < source.Length; ++i) + { + var value = source[i]; + if (value == null) + { + defLevels[i] = nullLevel; + } + else + { + destination[dst++] = (int) (value.Value * multiplier); + defLevels[i] = (short) (nullLevel + 1); + } + } + } + + public static void ConvertDecimal(ReadOnlySpan source, Span destination, decimal multiplier) + { + for (int i = 0; i < source.Length; ++i) + { + destination[i] = (long) (source[i] * multiplier); + } + } + + public static void ConvertDecimal(ReadOnlySpan source, Span defLevels, Span destination, decimal multiplier, short nullLevel) + { + for (int i = 0, dst = 0; i < source.Length; ++i) + { + var value = source[i]; + if (value == null) + { + defLevels[i] = nullLevel; + } + else + { + destination[dst++] = (long) (value.Value * multiplier); + defLevels[i] = (short) (nullLevel + 1); + } + } + } + public static void ConvertUuid(ReadOnlySpan source, Span destination, ByteBuffer byteBuffer) { for (int i = 0; i < source.Length; ++i) @@ -752,7 +861,7 @@ public static unsafe FixedLenByteArray FromUuid(Guid uuid, ByteBuffer byteBuffer // The value is encoded using big-endian, so that 00112233-4455-6677-8899-aabbccddeeff is encoded // as the bytes 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff. // - // But Guid endianess is platform dependent (and ToByteArray() uses a little endian representation). + // But Guid endianness is platform dependent (and ToByteArray() uses a little endian representation). if (BitConverter.IsLittleEndian) { // ReSharper disable once PossibleNullReferenceException diff --git a/csharp/PublicAPI/net471/PublicAPI.Unshipped.txt b/csharp/PublicAPI/net471/PublicAPI.Unshipped.txt index 7dc5c581..c73faa38 100644 --- a/csharp/PublicAPI/net471/PublicAPI.Unshipped.txt +++ b/csharp/PublicAPI/net471/PublicAPI.Unshipped.txt @@ -1 +1,9 @@ #nullable enable +static ParquetSharp.LogicalRead.ConvertDecimal(System.ReadOnlySpan source, System.ReadOnlySpan defLevels, System.Span destination, decimal multiplier, int typeLength, short definedLevel) -> void +static ParquetSharp.LogicalRead.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier, int typeLength) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier, ParquetSharp.ByteBuffer! byteBuffer, int typeLength) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span defLevels, System.Span destination, decimal multiplier, short nullLevel) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span defLevels, System.Span destination, decimal multiplier, short nullLevel) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span defLevels, System.Span destination, decimal multiplier, short nullLevel, ParquetSharp.ByteBuffer! byteBuffer, int typeLength) -> void diff --git a/csharp/PublicAPI/net6/PublicAPI.Unshipped.txt b/csharp/PublicAPI/net6/PublicAPI.Unshipped.txt index 7dc5c581..c73faa38 100644 --- a/csharp/PublicAPI/net6/PublicAPI.Unshipped.txt +++ b/csharp/PublicAPI/net6/PublicAPI.Unshipped.txt @@ -1 +1,9 @@ #nullable enable +static ParquetSharp.LogicalRead.ConvertDecimal(System.ReadOnlySpan source, System.ReadOnlySpan defLevels, System.Span destination, decimal multiplier, int typeLength, short definedLevel) -> void +static ParquetSharp.LogicalRead.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier, int typeLength) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier, ParquetSharp.ByteBuffer! byteBuffer, int typeLength) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span defLevels, System.Span destination, decimal multiplier, short nullLevel) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span defLevels, System.Span destination, decimal multiplier, short nullLevel) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span defLevels, System.Span destination, decimal multiplier, short nullLevel, ParquetSharp.ByteBuffer! byteBuffer, int typeLength) -> void diff --git a/csharp/PublicAPI/netstandard2.1/PublicAPI.Unshipped.txt b/csharp/PublicAPI/netstandard2.1/PublicAPI.Unshipped.txt index 7dc5c581..c73faa38 100644 --- a/csharp/PublicAPI/netstandard2.1/PublicAPI.Unshipped.txt +++ b/csharp/PublicAPI/netstandard2.1/PublicAPI.Unshipped.txt @@ -1 +1,9 @@ #nullable enable +static ParquetSharp.LogicalRead.ConvertDecimal(System.ReadOnlySpan source, System.ReadOnlySpan defLevels, System.Span destination, decimal multiplier, int typeLength, short definedLevel) -> void +static ParquetSharp.LogicalRead.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier, int typeLength) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span destination, decimal multiplier, ParquetSharp.ByteBuffer! byteBuffer, int typeLength) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span defLevels, System.Span destination, decimal multiplier, short nullLevel) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span defLevels, System.Span destination, decimal multiplier, short nullLevel) -> void +static ParquetSharp.LogicalWrite.ConvertDecimal(System.ReadOnlySpan source, System.Span defLevels, System.Span destination, decimal multiplier, short nullLevel, ParquetSharp.ByteBuffer! byteBuffer, int typeLength) -> void diff --git a/csharp/TypeUtils.cs b/csharp/TypeUtils.cs index 9878412c..67922c86 100644 --- a/csharp/TypeUtils.cs +++ b/csharp/TypeUtils.cs @@ -37,5 +37,16 @@ public static bool IsNullableNested(Type type, out Type inner) inner = null!; return false; } + + /// + /// Whether to use the type for conversion between decimal values + /// and fixed length byte array data. + /// + public static unsafe bool UseDecimal128(ColumnDescriptor columnDescriptor) + { + // Even if the type length matches Decimal128, we want to use DecimalConverter for higher + // precision than decimal supports so that we check for overflow and don't silently read invalid data. + return columnDescriptor.TypeLength == sizeof(Decimal128) && columnDescriptor.TypePrecision <= 29; + } } }