Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support the TimeOnly and DateOnly types added in .NET 6 #424

Merged
merged 3 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ For more detailed information on how to use ParquetSharp, see the following docu
* [Working with nested data](docs/Nested.md)
* [Reading and writing Arrow data](docs/Arrow.md) — how to read and write data using the [Apache Arrow format](https://arrow.apache.org/)
* [Row-oriented API](docs/RowOriented.md) — a higher level API that abstracts away the column-oriented nature of Parquet files
* [Custom types](docs/TypeFactories.md) — how to override the mapping between .NET and Parquet types
* [Custom types](docs/TypeFactories.md) — how to customize the mapping between .NET and Parquet types,
including using the `DateOnly` and `TimeOnly` types added in .NET 6.
* [Writing TimeSpan data](docs/TimeSpan.md) — interoperability with other libraries when writing TimeSpan data
* [Use from PowerShell](docs/PowerShell.md)

Expand Down
213 changes: 213 additions & 0 deletions csharp.test/TestLogicalTypeRoundtrip.cs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,219 @@ public static void TestRoundTripBuffered(
}
}

#if NET6_0_OR_GREATER
[Test]
public static void TestRoundTripDateOnly([Values] bool useReaderOverride)
{
var schemaColumns = new Column[]
{
new Column<DateOnly>("date"),
new Column<DateOnly?>("nullable_date"),
};

const int numRows = 100;
var dateValues = Enumerable.Range(0, numRows)
.Select(i => new DateOnly(2024, 1, 1).AddDays(i))
.ToArray();
var nullableDateValues = Enumerable.Range(0, numRows)
.Select(i => i % 5 == 1 ? (DateOnly?) null : new DateOnly(2024, 1, 1).AddDays(i))
.ToArray();

using var buffer = new ResizableBuffer();
using (var outStream = new BufferOutputStream(buffer))
{
using var fileWriter = new ParquetFileWriter(outStream, schemaColumns);
using var rowGroupWriter = fileWriter.AppendRowGroup();
{
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<DateOnly>();
columnWriter.WriteBatch(dateValues);
}
{
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<DateOnly?>();
columnWriter.WriteBatch(nullableDateValues);
}
fileWriter.Close();
}

DateOnly[] readDateValues;
DateOnly?[] readNullableDateValues;
using (var inStream = new BufferReader(buffer))
{
using var fileReader = new ParquetFileReader(inStream);
if (!useReaderOverride)
{
fileReader.LogicalTypeFactory = new LogicalTypeFactory
{
DateAsDateOnly = true,
};
}
using var rowGroupReader = fileReader.RowGroup(0);
{
using var columnReader = rowGroupReader.Column(0);
using var logicalReader = useReaderOverride
? columnReader.LogicalReaderOverride<DateOnly>()
: columnReader.LogicalReader<DateOnly>();
readDateValues = logicalReader.ReadAll(numRows);
}
{
using var columnReader = rowGroupReader.Column(1);
using var logicalReader = useReaderOverride
? columnReader.LogicalReaderOverride<DateOnly?>()
: columnReader.LogicalReader<DateOnly?>();
readNullableDateValues = logicalReader.ReadAll(numRows);
}
}

Assert.AreEqual(dateValues, readDateValues);
Assert.AreEqual(nullableDateValues, readNullableDateValues);
}

[TestCase(null, true)]
[TestCase(TimeUnit.Micros, true)]
[TestCase(TimeUnit.Millis, true)]
[TestCase(TimeUnit.Millis, false)]
public static void TestRoundTripTimeOnly(TimeUnit? timeUnit, bool useReaderOverride)
{
LogicalType? logicalTypeOverride = null;
if (timeUnit.HasValue)
{
logicalTypeOverride = LogicalType.Time(isAdjustedToUtc: true, timeUnit.Value);
}
var schemaColumns = new Column[]
{
new Column<TimeOnly>("time", logicalTypeOverride: logicalTypeOverride),
new Column<TimeOnly?>("nullable_time", logicalTypeOverride: logicalTypeOverride),
};

const int numRows = 100;
var timeValues = Enumerable.Range(0, numRows)
.Select(i => new TimeOnly(0, 0, 0).Add(TimeSpan.FromSeconds(i)))
.ToArray();
var nullableTimeValues = Enumerable.Range(0, numRows)
.Select(i => i % 5 == 1 ? (TimeOnly?) null : new TimeOnly(0, 0, 0).Add(TimeSpan.FromSeconds(i)))
.ToArray();

using var buffer = new ResizableBuffer();
using (var outStream = new BufferOutputStream(buffer))
{
using var fileWriter = new ParquetFileWriter(outStream, schemaColumns);
using var rowGroupWriter = fileWriter.AppendRowGroup();
{
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<TimeOnly>();
columnWriter.WriteBatch(timeValues);
}
{
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<TimeOnly?>();
columnWriter.WriteBatch(nullableTimeValues);
}
fileWriter.Close();
}

TimeOnly[] readTimeValues;
TimeOnly?[] readNullableTimeValues;
using (var inStream = new BufferReader(buffer))
{
using var fileReader = new ParquetFileReader(inStream);
if (!useReaderOverride)
{
fileReader.LogicalTypeFactory = new LogicalTypeFactory
{
TimeAsTimeOnly = true,
};
}
using var rowGroupReader = fileReader.RowGroup(0);
{
using var columnReader = rowGroupReader.Column(0);
using var logicalReader = useReaderOverride
? columnReader.LogicalReaderOverride<TimeOnly>()
: columnReader.LogicalReader<TimeOnly>();
readTimeValues = logicalReader.ReadAll(numRows);
}
{
using var columnReader = rowGroupReader.Column(1);
using var logicalReader = useReaderOverride
? columnReader.LogicalReaderOverride<TimeOnly?>()
: columnReader.LogicalReader<TimeOnly?>();
readNullableTimeValues = logicalReader.ReadAll(numRows);
}
}

Assert.AreEqual(timeValues, readTimeValues);
Assert.AreEqual(nullableTimeValues, readNullableTimeValues);
}

[Test]
[NonParallelizable]
public static void TestSetTimeOnlyAndDateOnlyOnDefaultTypeFactory()
{
var defaultDateAsDateOnly = LogicalTypeFactory.Default.DateAsDateOnly;
var defaultTimeAsTimeOnly = LogicalTypeFactory.Default.TimeAsTimeOnly;

try
{
LogicalTypeFactory.Default.DateAsDateOnly = true;
LogicalTypeFactory.Default.TimeAsTimeOnly = true;

// Create schema directly rather than using the column abstraction,
// to test that this uses the correct types from the type factory when writing.
using var dateNode = new PrimitiveNode("date", Repetition.Required, LogicalType.Date(), PhysicalType.Int32);
using var timeNode = new PrimitiveNode("time", Repetition.Required, LogicalType.Time(true, TimeUnit.Millis), PhysicalType.Int32);
using var schemaNode = new GroupNode("schema", Repetition.Required, new[] {dateNode, timeNode});

const int numRows = 100;
var timeValues = Enumerable.Range(0, numRows)
.Select(i => new TimeOnly(0, 0, 0).Add(TimeSpan.FromSeconds(i)))
.ToArray();
var dateValues = Enumerable.Range(0, numRows)
.Select(i => new DateOnly(2024, 1, 1).AddDays(i))
.ToArray();

using var buffer = new ResizableBuffer();
using (var outStream = new BufferOutputStream(buffer))
{

using var builder = new WriterPropertiesBuilder();
using var writerProperties = builder.Build();
using var fileWriter = new ParquetFileWriter(outStream, schemaNode, writerProperties);
using var rowGroupWriter = fileWriter.AppendRowGroup();
{
using var dateWriter = rowGroupWriter.NextColumn().LogicalWriter<DateOnly>();
dateWriter.WriteBatch(dateValues);
using var timeWriter = rowGroupWriter.NextColumn().LogicalWriter<TimeOnly>();
timeWriter.WriteBatch(timeValues);
}
fileWriter.Close();
}

DateOnly[] readDateValues;
TimeOnly[] readTimeValues;
using (var inStream = new BufferReader(buffer))
{
using var fileReader = new ParquetFileReader(inStream);
using var rowGroupReader = fileReader.RowGroup(0);
{
using var columnReader = rowGroupReader.Column(0);
using var logicalReader = columnReader.LogicalReader<DateOnly>();
readDateValues = logicalReader.ReadAll(numRows);
}
{
using var columnReader = rowGroupReader.Column(1);
using var logicalReader = columnReader.LogicalReader<TimeOnly>();
readTimeValues = logicalReader.ReadAll(numRows);
}
}

Assert.AreEqual(dateValues, readDateValues);
Assert.AreEqual(timeValues, readTimeValues);
}
finally
{
LogicalTypeFactory.Default.DateAsDateOnly = defaultDateAsDateOnly;
LogicalTypeFactory.Default.TimeAsTimeOnly = defaultTimeAsTimeOnly;
}
}
#endif

[TestCase(DateTimeKind.Utc, TimeUnit.Micros)]
[TestCase(DateTimeKind.Utc, TimeUnit.Millis)]
[TestCase(DateTimeKind.Unspecified, TimeUnit.Micros)]
Expand Down
109 changes: 109 additions & 0 deletions csharp/LogicalRead.cs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,43 @@ public static Delegate GetConverter(ColumnDescriptor columnDescriptor, ColumnChu
return LogicalRead.GetNullableNativeConverter<TimeSpanNanos, long>();
}

#if NET6_0_OR_GREATER
if (typeof(TLogical) == typeof(DateOnly))
{
return (LogicalRead<DateOnly, int>.Converter) ((s, _, d, _) => LogicalRead.ConvertDateOnly(s, d));
}

if (typeof(TLogical) == typeof(DateOnly?))
{
return (LogicalRead<DateOnly?, int>.Converter) LogicalRead.ConvertDateOnly;
}

if (typeof(TLogical) == typeof(TimeOnly))
{
switch (((TimeLogicalType) logicalType).TimeUnit)
{
case TimeUnit.Millis:
return (LogicalRead<TimeOnly, int>.Converter) ((s, _, d, _) => LogicalRead.ConvertTimeOnlyMillis(s, d));
case TimeUnit.Micros:
return (LogicalRead<TimeOnly, long>.Converter) ((s, _, d, _) => LogicalRead.ConvertTimeOnlyMicros(s, d));
}
}

if (typeof(TLogical) == typeof(TimeOnly?))
{
var timeLogicalType = (TimeLogicalType) logicalType;
var timeUnit = timeLogicalType.TimeUnit;

switch (timeUnit)
{
case TimeUnit.Millis:
return (LogicalRead<TimeOnly?, int>.Converter) LogicalRead.ConvertTimeOnlyMillis;
case TimeUnit.Micros:
return (LogicalRead<TimeOnly?, long>.Converter) LogicalRead.ConvertTimeOnlyMicros;
}
}
#endif

if (typeof(TLogical) == typeof(string))
{
var byteArrayCache = new ByteArrayReaderCache<TPhysical, TLogical>(columnChunkMetaData);
Expand Down Expand Up @@ -572,6 +609,56 @@ public static void ConvertTimeSpanMillis(ReadOnlySpan<int> source, ReadOnlySpan<
}
}

#if NET6_0_OR_GREATER
public static void ConvertDateOnly(ReadOnlySpan<int> source, Span<DateOnly> destination)
{
for (int i = 0; i < destination.Length; ++i)
{
destination[i] = ToDateOnly(source[i]);
}
}

public static void ConvertDateOnly(ReadOnlySpan<int> source, ReadOnlySpan<short> defLevels, Span<DateOnly?> destination, short definedLevel)
{
for (int i = 0, src = 0; i < destination.Length; ++i)
{
destination[i] = defLevels[i] != definedLevel ? default(DateOnly?) : ToDateOnly(source[src++]);
}
}

public static void ConvertTimeOnlyMicros(ReadOnlySpan<long> source, Span<TimeOnly> destination)
{
for (int i = 0; i < destination.Length; ++i)
{
destination[i] = ToTimeOnlyMicros(source[i]);
}
}

public static void ConvertTimeOnlyMicros(ReadOnlySpan<long> source, ReadOnlySpan<short> defLevels, Span<TimeOnly?> destination, short definedLevel)
{
for (int i = 0, src = 0; i < destination.Length; ++i)
{
destination[i] = defLevels[i] != definedLevel ? default(TimeOnly?) : ToTimeOnlyMicros(source[src++]);
}
}

public static void ConvertTimeOnlyMillis(ReadOnlySpan<int> source, Span<TimeOnly> destination)
{
for (int i = 0; i < destination.Length; ++i)
{
destination[i] = ToTimeOnlyMillis(source[i]);
}
}

public static void ConvertTimeOnlyMillis(ReadOnlySpan<int> source, ReadOnlySpan<short> defLevels, Span<TimeOnly?> destination, short definedLevel)
{
for (int i = 0, src = 0; i < destination.Length; ++i)
{
destination[i] = defLevels[i] != definedLevel ? default(TimeOnly?) : ToTimeOnlyMillis(source[src++]);
}
}
#endif

Comment on lines +612 to +661
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think all those methods could be internal, but given that other converters are already public we should probably maintain consistency.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm yeah the convert methods were all made public intentionally in #185. I assume the reason is that users might want to make use of these methods in their own custom converters. It does mean we have to try not to introduce breaking changes here though.

public static void ConvertString(ReadOnlySpan<ByteArray> source, ReadOnlySpan<short> defLevels, Span<string?> destination, short definedLevel, ByteArrayReaderCache<ByteArray, string> byteArrayCache)
{
for (int i = 0, src = 0; i < destination.Length; ++i)
Expand Down Expand Up @@ -737,6 +824,28 @@ public static byte[] ToByteArray(ByteArray byteArray)
return array;
}

#if NET6_0_OR_GREATER
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static DateOnly ToDateOnly(int source)
{
return DateOnly.FromDayNumber(BaseDateOnlyNumber + source);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TimeOnly ToTimeOnlyMicros(long source)
{
return TimeOnly.FromTimeSpan(TimeSpan.FromTicks(source * (TimeSpan.TicksPerMillisecond / 1000)));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TimeOnly ToTimeOnlyMillis(int source)
{
return TimeOnly.FromTimeSpan(TimeSpan.FromTicks(source * TimeSpan.TicksPerMillisecond));
}

private static readonly int BaseDateOnlyNumber = LogicalWrite.BaseDateOnlyNumber;
#endif

public const long DateTimeOffset = LogicalWrite.DateTimeOffset;
}
}
Loading
Loading