From 77104a85fae4b53a500832595d6bb648e1515bf2 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 7 Mar 2022 10:09:57 +1300 Subject: [PATCH] Expand on row-oriented API drawbacks --- docs/RowOriented.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/RowOriented.md b/docs/RowOriented.md index 80a4e4c5..e971067d 100644 --- a/docs/RowOriented.md +++ b/docs/RowOriented.md @@ -22,9 +22,25 @@ for (int i = 0; i != timestamps.Length; ++i) } } +// Write a new row group (pretend we have new timestamps, objectIds and values) +rowWriter.StartNewRowGroup(); +for (int i = 0; i != timestamps.Length; ++i) +{ + for (int j = 0; j != objectIds.Length; ++j) + { + rowWriter.WriteRow((timestamps[i], objectIds[j], values[i][j])); + } +} + rowWriter.Close(); ``` +Internally, ParquetSharp will build up a buffer of row values and then write each column when the file +is closed or a new row group is started. +This means all values in a row group must be stored in memory at once, +and the row values buffer must be resized and copied as it grows. +Therefore, it's recommended to use the lower-level column oriented API if performance is a concern. + ## Explicit column mapping The row-oriented API allows for specifying your own name-independent/order-independent column mapping using the optional `MapToColumn` attribute.