From 57c257397e7cf897eda9cc3428d4823d626eacc3 Mon Sep 17 00:00:00 2001
From: Adam Reeve <adreeve@gmail.com>
Date: Thu, 24 Feb 2022 16:16:29 +1300
Subject: [PATCH 1/3] Restructure and expand documentation

---
 README.md                                 |  86 +++++---------
 RowOriented.md                            |  28 -----
 docs/PowerShell.md                        |  10 ++
 docs/Reading.md                           |  99 ++++++++++++++++
 docs/RowOriented.md                       |  53 +++++++++
 TypeFactories.md => docs/TypeFactories.md |  14 +--
 docs/Writing.md                           | 138 ++++++++++++++++++++++
 7 files changed, 338 insertions(+), 90 deletions(-)
 delete mode 100644 RowOriented.md
 create mode 100644 docs/PowerShell.md
 create mode 100644 docs/Reading.md
 create mode 100644 docs/RowOriented.md
 rename TypeFactories.md => docs/TypeFactories.md (91%)
 create mode 100644 docs/Writing.md
diff --git a/README.md b/README.md
index 5f5edfaa..ce01fc59 100644
--- a/README.md
+++ b/README.md
@@ -25,43 +25,17 @@ Supported platforms:
 | **Pre-Release Nuget** | [![NuGet latest pre-release](https://img.shields.io/nuget/vpre/ParquetSharp.svg)](https://www.nuget.org/packages/ParquetSharp/absoluteLatest)                                                                                  |
 | **CI Build**          | [![CI Status](https://github.com/G-Research/ParquetSharp/actions/workflows/ci.yml/badge.svg?branch=master&event=push)](https://github.com/G-Research/ParquetSharp/actions/workflows/ci.yml?query=branch%3Amaster+event%3Apush) |
 
-## Examples
+## Quickstart
 
-Both examples below output a Parquet file with three columns representing a timeseries of object-value pairs ordered by datetime and object id.
+The following examples show how to write and then read a Parquet file with three columns representing a timeseries of object-value pairs.
+These use the low-level API, which is the recommended API and closely maps to the API of Apache Parquet C++.
 
-### Row-oriented API
-
-The row-oriented API offers a convenient way to abstract the column-oriented nature of Parquet files at the expense of memory, speed and flexibility. It lets one write a whole row in a single call, often resulting in more readable code.
-
-```csharp
-var timestamps = new DateTime[] { /* ... */ };
-var objectIds = new int[] { /* ... */ };
-var values = timestamps.Select(t => objectIds.Select(o => (float) rand.NextDouble()).ToArray()).ToArray();
-var columns = new[] {"Timestamp", "ObjectId", "Value"};
-
-using var rowWriter = ParquetFile.CreateRowWriter<(DateTime, int, float)>("float_timeseries.parquet", columns);
-
-for (int i = 0; i != timestamps.Length; ++i)
-{
-    for (int j = 0; j != objectIds.Length; ++j)
-    {
-        rowWriter.WriteRow((timestamps[i], objectIds[j], values[i][j]));
-    }
-}
-
-rowWriter.Close();
-```
-
-The column names can also be explicitly given, see [Row-oriented API (Advanced)](RowOriented.md) for more details.
-
-### Low-level API
-
-This closely maps to the API of Apache Parquet C++. It also provides reader and writer abstractions (`LogicalColumnReader` and `LogicalColumnWriter` respectively) to convert between .NET types and Parquet representations. This is the recommended API.
+Writing a Parquet File:
 
 ```csharp
 var timestamps = new DateTime[] { /* ... */ };
 var objectIds = new int[] { /* ... */ };
-var values = timestamps.Select(t => objectIds.Select(o => (float) rand.NextDouble()).ToArray()).ToArray();
+var values = new float[] { /* ... */ };
 
 var columns = new Column[]
 {
@@ -75,44 +49,46 @@ using var rowGroup = file.AppendRowGroup();
 
 using (var timestampWriter = rowGroup.NextColumn().LogicalWriter<DateTime>())
 {
-    for (int i = 0; i != timestamps.Length; ++i)
-    {
-        timestampWriter.WriteBatch(Enumerable.Repeat(timestamps[i], objectIds.Length).ToArray());
-    }
+    timestampWriter.WriteBatch(timestamps);
 }
-
 using (var objectIdWriter = rowGroup.NextColumn().LogicalWriter<int>())
 {
-    for (int i = 0; i != timestamps.Length; ++i)
-    {
-        objectIdWriter.WriteBatch(objectIds);
-    }
+    objectIdWriter.WriteBatch(objectIds);
 }
-
 using (var valueWriter = rowGroup.NextColumn().LogicalWriter<float>())
 {
-    for (int i = 0; i != timestamps.Length; ++i)
-    {
-        valueWriter.WriteBatch(values[i]);
-    }
+    valueWriter.WriteBatch(values);
 }
 
 file.Close();
 ```
 
-### Custom Types
+Reading the file back:
+
+```csharp
+using var file = new ParquetFileReader("float_timeseries.parquet");
+
+for (int rowGroup = 0; rowGroup < file.FileMetaData.NumRowGroups; ++rowGroup) {
+    using var rowGroupReader = file.RowGroup(rowGroup);
+    var groupNumRows = checked((int) rowGroupReader.MetaData.NumRows);
+
+    var groupTimestamps = rowGroupReader.Column(0).LogicalReader<DateTime>().ReadAll(groupNumRows);
+    var groupObjectIds = rowGroupReader.Column(1).LogicalReader<int>().ReadAll(groupNumRows);
+    var groupValues = rowGroupReader.Column(2).LogicalReader<float>().ReadAll(groupNumRows);
+}
+
+file.Close();
+```
 
-ParquetSharp allows the user to override the mapping between C# and Parquet types. Check the [Type Factories documentation](TypeFactories.md) for more information.
+## Documentation
 
-### PowerShell
+For more detailed information on how to use ParquetSharp, see the following documentation:
 
-It's possible to use ParquetSharp from PowerShell.
-You can install ParquetSharp with the [NuGet command line interface](https://docs.microsoft.com/en-us/nuget/reference/nuget-exe-cli-reference),
-then use `Add-Type` to load `ParquetSharp.dll`.
-However, you must ensure that the appropriate `ParquetSharpNative.dll` for your architecture and OS can be loaded as required,
-either by putting it somewhere in your `PATH` or in the same directory as `ParquetSharp.dll`.
-For examples of how to use ParquetSharp from PowerShell,
-see [these scripts from Apteco](https://github.com/Apteco/HelperScripts/tree/master/scripts/parquet).
+* [Writing parquet files](docs/Writing.md)
+* [Reading parquet files](docs/Reading.md)
+* [Row-oriented API](docs/RowOriented.md) &mdash; a higher level API that abstracts away the column-oriented nature of Parquet files
+* [Custom types](docs/TypeFactories.md) &mdash; how to override the mapping between .NET and Parquet types
+* [Use from PowerShell](docs/PowerShell.md)
 
 ## Rationale
 
diff --git a/RowOriented.md b/RowOriented.md
deleted file mode 100644
index 2cf5bf98..00000000
--- a/RowOriented.md
+++ /dev/null
@@ -1,28 +0,0 @@
-## Row-oriented API (Advanced)
-
-### Explicit column mapping
-
-The row-oriented API allows for specifying your own name-independent/order-independent column mapping using the optional `MapToColumn` attribute.
-
-```csharp
-struct MyRow
-{
-    [MapToColumn("ColumnA")]
-    public long MyKey;
-
-    [MapToColumn("ColumnB")]
-    public string MyValue;
-}
-
-using (var rowReader = ParquetFile.CreateRowReader<MyRow>("example.parquet"))
-{
-    for (int i = 0; i < rowReader.FileMetaData.NumRowGroups; ++i)
-    {
-        var values = rowReader.ReadRows(i);
-        foreach (MyRow r in values)
-        {
-            Console.WriteLine(r.MyKey + "/" + r.MyValue);
-        }
-    }
-}
-```
\ No newline at end of file
diff --git a/docs/PowerShell.md b/docs/PowerShell.md
new file mode 100644
index 00000000..37d8becf
--- /dev/null
+++ b/docs/PowerShell.md
@@ -0,0 +1,10 @@
+# ParquetSharp in PowerShell
+
+It's possible to use ParquetSharp from PowerShell.
+You can install ParquetSharp with the [NuGet command line interface](https://docs.microsoft.com/en-us/nuget/reference/nuget-exe-cli-reference),
+then use `Add-Type` to load `ParquetSharp.dll`.
+However, you must ensure that the appropriate `ParquetSharpNative.dll` for your architecture and OS can be loaded as required,
+either by putting it somewhere in your `PATH` or in the same directory as `ParquetSharp.dll`.
+For examples of how to use ParquetSharp from PowerShell,
+see [these scripts from Apteco](https://github.com/Apteco/HelperScripts/tree/master/scripts/parquet).
+
diff --git a/docs/Reading.md b/docs/Reading.md
new file mode 100644
index 00000000..754d2c92
--- /dev/null
+++ b/docs/Reading.md
@@ -0,0 +1,99 @@
+# Reading Parquet files
+
+The low-level ParquetSharp API provides the `ParquetFileReader` class for reading Parquet files.
+This is usually constructed from a file path, but may also be constructed from a `ManagedRandomAccessFile`,
+which wraps a .NET `System.IO.Stream` that supports seeking.
+
+```csharp
+using var fileReader = new ParquetFileReader("data.parquet");
+```
+or
+```csharp
+using var input = new ManagedRandomAccessFile(File.OpenRead("data.parquet");
+using var fileReader = new ParquetFileReader(input);
+```
+
+The `FileMetaData` property of a `ParquetFileReader` exposes information about the Parquet file and its schema:
+```csharp
+int numColumns = fileReader.FileMetaData.NumColumns;
+long numRows = fileReader.FileMetaData.NumRows;
+int numRowGroups = fileReader.FileMetaData.NumRowGroups;
+IReadOnlyDictionary<string, string> metadata = fileReader.FileMetaData.KeyValueMetadata;
+
+SchemaDescriptor schema = fileReader.FileMetaData.Schema;
+for (int columnIndex = 0; columnIndex < schema.NumColumns; ++columnIndex) {
+    ColumnDescriptor colum = schema.Column(columnIndex);
+    string columnName = column.Name;
+}
+```
+
+Parquet files store data in separate row groups, which all share the same schema,
+so if you wish to read all data in a file, you generally want to loop over all of the row groups
+and create a `RowGroupReader` for each one:
+
+```csharp
+for (int rowGroup = 0; rowGroup < fileReader.FileMetaData.NumRowGroups; ++rowGroup) {
+    using var rowGroupReader = fileReader.RowGroup(rowGroup);
+    var groupNumRows = rowGroupReader.MetaData.NumRows;
+}
+```
+
+The `Column` method of `RowGroupReader` takes an integer column index and returns a `ColumnReader` object,
+which can read primitive values from the column, as well as raw definition level and repetition level data.
+Usually you will not want to use a `ColumnReader` directly, but instead call its `LogicalReader` method to
+create a `LogicalColumnReader` that can read logical values.
+There are two variations of this `LogicalReader` method; the plain `LogicalReader` method returns an abstract
+`LogicalColumnReader`, whereas the generic `LogicalReader<TElement>` method returns a typed `LogicalColumnReader<TElement>`,
+which reads values of the specified element type.
+
+If you know ahead of time the data types for the columns you will read, you can simply use the generic methods and
+read values directly. For example, to read data from the first column which represents a timestamp:
+
+```csharp
+DateTime[] timestamps = rowGroupReader.Column(0).LogicalReader<DateTime>().ReadAll(numRows);
+```
+
+However, if you don't know ahead of time the types for each column, you can implement the
+`ILogicalColumnReaderVisitor<TReturn>` interface to handle column data in a type-safe way, for example:
+
+```csharp
+sealed class ColumnPrinter : ILogicalColumnReaderVisitor<string>
+{
+    public string OnLogicalColumnReader<TElement>(LogicalColumnReader<TElement> columnReader)
+    {
+        var stringBuilder = new StringBuilder();
+        foreach (var value in columnReader) {
+            stringBuilder.Append(value?.ToString() ?? "null");
+            stringBuilder.Append(",");
+        }
+        return stringBuilder.ToString();
+    }
+}
+
+string columnValues = rowGroupReader.Column(0).LogicalReader().Apply(new ColumnPrinter());
+```
+
+There's a similar `IColumnReaderVisitor<TReturn>` interface for working with `ColumnReader` objects
+and reading physical values in a type-safe way, but most users will want to work at the logical element level.
+
+The `LogicalColumnReader<TElement>` class provides multiple ways to read data.
+It implements `IEnumerable<TElement>` which internally buffers batches of data and iterates over them,
+but for more fine-grained control over reading behaviour, you can read into your own buffer. For example:
+
+```csharp
+var buffer = new TElement[4096];
+
+while (logicalColumnReader.HasNext)
+{
+    int numRead = logicalColumnReader.ReadBatch(buffer);
+
+    for (int i = 0; i != numRead; ++i)
+    {
+        TElement value = buffer[i];
+        // Use value
+    }
+}
+```
+
+The .NET type used to represent read values can optionally be overridden by using the `ColumnReader.LogicalReaderOverride<TElement>` method.
+For more details, see the [type factories documentation](TypeFactories.md).
diff --git a/docs/RowOriented.md b/docs/RowOriented.md
new file mode 100644
index 00000000..80a4e4c5
--- /dev/null
+++ b/docs/RowOriented.md
@@ -0,0 +1,53 @@
+# Row-oriented API
+
+The row-oriented API offers a convenient way to abstract the column-oriented nature of Parquet files
+at the expense of memory, speed and flexibility.
+It lets one write a whole row in a single call, often resulting in more readable code.
+
+For example, writing a file with the row-oriented API and using a tuple to represent a row of values:
+
+```csharp
+var timestamps = new DateTime[] { /* ... */ };
+var objectIds = new int[] { /* ... */ };
+var values = timestamps.Select(t => objectIds.Select(o => (float) rand.NextDouble()).ToArray()).ToArray();
+var columns = new[] {"Timestamp", "ObjectId", "Value"};
+
+using var rowWriter = ParquetFile.CreateRowWriter<(DateTime, int, float)>("float_timeseries.parquet", columns);
+
+for (int i = 0; i != timestamps.Length; ++i)
+{
+    for (int j = 0; j != objectIds.Length; ++j)
+    {
+        rowWriter.WriteRow((timestamps[i], objectIds[j], values[i][j]));
+    }
+}
+
+rowWriter.Close();
+```
+
+## Explicit column mapping
+
+The row-oriented API allows for specifying your own name-independent/order-independent column mapping using the optional `MapToColumn` attribute.
+
+```csharp
+struct MyRow
+{
+    [MapToColumn("ColumnA")]
+    public long MyKey;
+
+    [MapToColumn("ColumnB")]
+    public string MyValue;
+}
+
+using (var rowReader = ParquetFile.CreateRowReader<MyRow>("example.parquet"))
+{
+    for (int i = 0; i < rowReader.FileMetaData.NumRowGroups; ++i)
+    {
+        var values = rowReader.ReadRows(i);
+        foreach (MyRow r in values)
+        {
+            Console.WriteLine(r.MyKey + "/" + r.MyValue);
+        }
+    }
+}
+```
diff --git a/TypeFactories.md b/docs/TypeFactories.md
similarity index 91%
rename from TypeFactories.md
rename to docs/TypeFactories.md
index 4439ec25..57f20fac 100644
--- a/TypeFactories.md
+++ b/docs/TypeFactories.md
@@ -1,4 +1,4 @@
-## Type Factories
+# Type Factories
 
 ParquetSharp API exposes the logic that maps the C# types (called "logical system types" by ParquetSharp, as per Parquet's LogicalType) to the actual Parquet physical types, as well as the converters that are associated with them.
 
@@ -6,7 +6,7 @@ This means that:
 - a user can potentially read/write any type they want, as long as they provide a viable mapping,
 - a user can override the default ParquetSharp mapping and change how existing C# types are handled.
 
-### API
+## API
 
 The API at the core of this is encompassed by `LogicalTypeFactory`, `LogicalReadConverterFactory` and `LogicalWriteConverterFactory`.
 
@@ -16,7 +16,7 @@ On top of that, if the custom type is used for creating the schema (when writing
 
 In other words, the `LogicalTypeFactory` is required if the user provides a `Column` class with a custom type (writer only, the factory is needed to know the physical parquet type) or gets the `LogicalColumnReader/Writer` via the non type-overriding methods (in which case the factory is needed to know the full type of the logical column reader/writer). The corresponding converter factory is always needed.
 
-### Examples
+## Examples
 
 One of the approaches for reading custom values can be described by the following code.
 
@@ -26,16 +26,16 @@ One of the approaches for reading custom values can be described by the followin
     using var columnReader = groupReader.Column(0).LogicalReaderOverride<VolumeInDollars>();
 
     var values = columnReader.ReadAll(checked((int) groupReader.MetaData.NumRows));
-    
+
     /* ... */
-    
+
     [StructLayout(LayoutKind.Sequential)]
     private readonly struct VolumeInDollars
     {
         public VolumeInDollars(float value) { Value = value; }
         public readonly float Value;
     }
-    
+
     private sealed class ReadConverterFactory : LogicalReadConverterFactory
     {
         public override Delegate GetConverter<TLogical, TPhysical>(ColumnDescriptor columnDescriptor, ColumnChunkMetaData columnChunkMetaData)
@@ -46,4 +46,4 @@ One of the approaches for reading custom values can be described by the followin
     }
 ```
 
-But do check [TestLogicalTypeFactory.cs](csharp.test/TestLogicalTypeFactory.cs) for a more comprehensive set of examples, as there are many places that can be customized and optimized by the user.
\ No newline at end of file
+But do check [TestLogicalTypeFactory.cs](../csharp.test/TestLogicalTypeFactory.cs) for a more comprehensive set of examples, as there are many places that can be customized and optimized by the user.
diff --git a/docs/Writing.md b/docs/Writing.md
new file mode 100644
index 00000000..cfc050e1
--- /dev/null
+++ b/docs/Writing.md
@@ -0,0 +1,138 @@
+# Writing Parquet files
+
+The low-level ParquetSharp API provides the `ParquetFileWriter` class for writing Parquet files.
+
+## Defining the schema
+
+When writing a Parquet file, you must define the schema up-front, which specifies all of the columns
+in the file along with their names and types.
+This schema can be defined using a graph of `ParquetSharp.Schema.Node` instances,
+starting from a root `GroupNode`,
+but ParquetSharp also provides a convenient higher level API for defining the schema as an array
+of `Column` objects.
+A `Column` can be constructed using only a name and a type parameter that is used to
+determine the logical Parquet type to write:
+
+```csharp
+var columns = new Column[]
+{
+    new Column<DateTime>("Timestamp"),
+    new Column<int>("ObjectId"),
+    new Column<float>("Value")
+};
+
+using var file = new ParquetFileWriter("float_timeseries.parquet", columns);
+```
+
+For more control over how values are represented in the Parquet file,
+you can provide the `logicalTypeOverride` parameter to the `Column` constructor which accepts a `LogicalType`.
+
+For example, you may wish to write times or timestamps with millisecond resolution rather than the default microsecond resolution:
+```csharp
+var timestampColumn = new Column<DateTime>("Timestamp", LogicalType.Timestamp(isAdjustedToUtc: true, TimeUnit.Millis);
+var timeColumn = new Column<TimeSpan>("Time", LogicalType.Time(isAdjustedToUtc: true, TimeUnit.Millis);
+```
+
+When writing decimal values, you must provide a `logicalTypeOverride` to define the precision and scale type parameters.
+Currently the precision must be 29.
+```csharp
+var decimalColumn = new Column<decimal>("Values", LogicalType.Decimal(precision: 29, scale: 3);
+```
+
+As well as defining the file schema, you may optionally provide key-value metadata that is stored in the file when creating
+a `ParquetFileWriter`:
+
+```csharp
+var metadata = new Dictionary<string, string>
+{
+    {"foo": "bar"},
+};
+using var file = new ParquetFileWriter("float_timeseries.parquet", columns, keyValueMetadata: metadata);
+```
+
+`ParquetFileWriter` constructor overrides are provided that allow specifying the type of compression to use, or for more
+fine-grained control over how files are written, you can provide a `WriterProperties` instance, which can
+be constructed with a `WriterPropertiesBuilder`.
+This allows defining the compression and encoding on a per-column basis for example, or configuring file encryption.
+
+## Writing to a stream
+
+As well as writing to a file path, ParquetSharp supports writing to a .NET `System.IO.Stream` using a `ManagedOutputStream`:
+
+```csharp
+using (var stream = new FileStream("float_timeseries.parquet", FileMode.Create))
+{
+    using var writer = new IO.ManagedOutputStream(stream);
+    using var fileWriter = new ParquetFileWriter(writer, columns);
+}
+```
+
+## Writing column data
+
+Parquet data is written in batches of column data named row groups.
+To begin writing data, you first create a new row group:
+```csharp
+using RowGroupWriter rowGroup = file.AppendRowGroup();
+```
+
+You must then write each column's data in the order in which the columns are defined in the schema:
+
+```csharp
+using (var timestampWriter = rowGroup.NextColumn().LogicalWriter<DateTime>())
+{
+    timestampWriter.WriteBatch(timestamps);
+}
+using (var objectIdWriter = rowGroup.NextColumn().LogicalWriter<int>())
+{
+    objectIdWriter.WriteBatch(objectIds);
+}
+using (var valueWriter = rowGroup.NextColumn().LogicalWriter<float>())
+{
+    valueWriter.WriteBatch(values);
+}
+```
+
+Once all data for a row group has been written and the `RowGroupWriter` disposed,
+you may append another row group to the file and repeat the row group writing process.
+
+The `NextColumn` method of `RowGroupWriter` returns a `ColumnWriter`, which writes physical values to the file,
+and can write definition level and repetition level data to support nullable and array values.
+
+Rather than working with a `ColumnWriter` directly, it's usually more convenient to create a `LogicalColumnWriter`
+with the `ColumnWriter.LogicalWriter<TElement>` method.
+This allows writing an array or `ReadOnlySpan` of `TElement` to the column data,
+where `TElement` is the .NET type corresponding to the column's logical type.
+
+There is also a `ColumnWriter.LogicalWriterOverride` method, which supports writing data using a different type
+to the default .NET type corresponding to the column's logical type. For more information on how to use this,
+see the [type factories documentation](TypeFactories.md).
+
+If you don't know ahead of time the column types that will be written,
+you can implement the `ILogicalColumnWriterVisitor<TReturn>` interface to handle writing data in a type-safe way:
+
+```csharp
+sealed class ExampleWriter : ILogicalColumnWriterVisitor<bool>
+{
+    public bool OnLogicalColumnWriter<TValue>(LogicalColumnWriter<TValue> columnWriter)
+    {
+        TValue[] values = GetValues();
+        columnWriter.WriteBatch(values);
+        return true;
+    }
+}
+
+using RowGroupWriter rowGroup = file.AppendRowGroup();
+for (int columnIndex = 0; columnIndex < file.NumColumns; ++columnIndex)
+{
+    using var columnWriter = rowGroup.NextColumn();
+    using var logicalWriter = columnWriter.LogicalWriter();
+    var returnVal = logicalWriter.Apply(new ExampleWriter());
+}
+```
+
+Note that it's important to explicitly call `Close` on the `ParquetFileWriter` when writing is complete,
+as otherwise any errors encountered when writing may be silently ignored:
+
+```csharp
+file.Close();
+```

From 5b8678a044e5bad1f91263e904a20237afe29a7a Mon Sep 17 00:00:00 2001
From: Adam Reeve <adreeve@gmail.com>
Date: Thu, 24 Feb 2022 17:18:14 +1300
Subject: [PATCH 2/3] Small fixes

---
 docs/Reading.md |  4 ++--
 docs/Writing.md | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/Reading.md b/docs/Reading.md
index 754d2c92..c8b2079b 100644
--- a/docs/Reading.md
+++ b/docs/Reading.md
@@ -9,7 +9,7 @@ using var fileReader = new ParquetFileReader("data.parquet");
 ```
 or
 ```csharp
-using var input = new ManagedRandomAccessFile(File.OpenRead("data.parquet");
+using var input = new ManagedRandomAccessFile(File.OpenRead("data.parquet"));
 using var fileReader = new ParquetFileReader(input);
 ```
 
@@ -34,7 +34,7 @@ and create a `RowGroupReader` for each one:
 ```csharp
 for (int rowGroup = 0; rowGroup < fileReader.FileMetaData.NumRowGroups; ++rowGroup) {
     using var rowGroupReader = fileReader.RowGroup(rowGroup);
-    var groupNumRows = rowGroupReader.MetaData.NumRows;
+    long groupNumRows = rowGroupReader.MetaData.NumRows;
 }
 ```
 
diff --git a/docs/Writing.md b/docs/Writing.md
index cfc050e1..ceda7a4c 100644
--- a/docs/Writing.md
+++ b/docs/Writing.md
@@ -25,12 +25,14 @@ using var file = new ParquetFileWriter("float_timeseries.parquet", columns);
 ```
 
 For more control over how values are represented in the Parquet file,
-you can provide the `logicalTypeOverride` parameter to the `Column` constructor which accepts a `LogicalType`.
+you can pass a `LogicalType` instance as the `logicalTypeOverride` parameter of the `Column` constructor.
 
 For example, you may wish to write times or timestamps with millisecond resolution rather than the default microsecond resolution:
 ```csharp
-var timestampColumn = new Column<DateTime>("Timestamp", LogicalType.Timestamp(isAdjustedToUtc: true, TimeUnit.Millis);
-var timeColumn = new Column<TimeSpan>("Time", LogicalType.Time(isAdjustedToUtc: true, TimeUnit.Millis);
+var timestampColumn = new Column<DateTime>(
+        "Timestamp", LogicalType.Timestamp(isAdjustedToUtc: true, timeUnit: TimeUnit.Millis));
+var timeColumn = new Column<TimeSpan>(
+        "Time", LogicalType.Time(isAdjustedToUtc: true, timeUnit: TimeUnit.Millis));
 ```
 
 When writing decimal values, you must provide a `logicalTypeOverride` to define the precision and scale type parameters.
@@ -101,7 +103,7 @@ and can write definition level and repetition level data to support nullable and
 Rather than working with a `ColumnWriter` directly, it's usually more convenient to create a `LogicalColumnWriter`
 with the `ColumnWriter.LogicalWriter<TElement>` method.
 This allows writing an array or `ReadOnlySpan` of `TElement` to the column data,
-where `TElement` is the .NET type corresponding to the column's logical type.
+where `TElement` is the .NET type corresponding to the column's logical element type.
 
 There is also a `ColumnWriter.LogicalWriterOverride` method, which supports writing data using a different type
 to the default .NET type corresponding to the column's logical type. For more information on how to use this,

From 77104a85fae4b53a500832595d6bb648e1515bf2 Mon Sep 17 00:00:00 2001
From: Adam Reeve <adreeve@gmail.com>
Date: Mon, 7 Mar 2022 10:09:57 +1300
Subject: [PATCH 3/3] Expand on row-oriented API drawbacks

---
 docs/RowOriented.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/docs/RowOriented.md b/docs/RowOriented.md
index 80a4e4c5..e971067d 100644
--- a/docs/RowOriented.md
+++ b/docs/RowOriented.md
@@ -22,9 +22,25 @@ for (int i = 0; i != timestamps.Length; ++i)
     }
 }
 
+// Write a new row group (pretend we have new timestamps, objectIds and values)
+rowWriter.StartNewRowGroup();
+for (int i = 0; i != timestamps.Length; ++i)
+{
+    for (int j = 0; j != objectIds.Length; ++j)
+    {
+        rowWriter.WriteRow((timestamps[i], objectIds[j], values[i][j]));
+    }
+}
+
 rowWriter.Close();
 ```
 
+Internally, ParquetSharp will build up a buffer of row values and then write each column when the file
+is closed or a new row group is started.
+This means all values in a row group must be stored in memory at once,
+and the row values buffer must be resized and copied as it grows.
+Therefore, it's recommended to use the lower-level column oriented API if performance is a concern.
+
 ## Explicit column mapping
 
 The row-oriented API allows for specifying your own name-independent/order-independent column mapping using the optional `MapToColumn` attribute.