/*
 * Copyright 2020 James Courtney
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

using FlatSharp.Internal;

namespace Samples.SharedStrings;

/// <summary>
/// This file shows how to use FlatSharp to provide automatic string deduplication. In this example,
/// we define a collection of rows where each value is a (Key, Value) pair. We use string deduplication
/// to share the column names so that we don't serialize the column name for each cell.
/// </summary>
public class SharedStringsExample : IFlatSharpSample
{
    public void Run()
    {
        // Create a Database of 10000 rows.
        Database Database = new Database()
        {
            Rows = Enumerable.Range(0, 10000).Select(CreateRow).ToArray(),
        };

        // Shared strings are enabled by default.
        ISerializer<Database> defaultSerializer = Database.Serializer;

        // We can create a new serializer based on the current one with shared strings turned off.
        // These factory delegates configure the writer.
        ISerializer<Database> noSharedStringsSerializer = Database.Serializer.WithSettings(s => s.DisableSharedStrings());

        // We can also create our own shared string providers (defined at the bottom of this file).
        ISerializer<Database> customSharedStringSerializer = Database.Serializer.WithSettings(s => s.UseSharedStringWriter(() => new PerfectSharedStringWriter()));

        byte[] unsharedBuffer = new byte[noSharedStringsSerializer.GetMaxSize(Database)];
        byte[] sharedBuffer = new byte[defaultSerializer.GetMaxSize(Database)];
        byte[] customBuffer = new byte[customSharedStringSerializer.GetMaxSize(Database)];

        int unsharedBytesWritten = noSharedStringsSerializer.Write(unsharedBuffer, Database);
        int defaultSharedBytesWritten = defaultSerializer.Write(sharedBuffer, Database);
        int customSharedBytesWritten = customSharedStringSerializer.Write(customBuffer, Database);

        Console.WriteLine($"Serialized size without shared strings: {unsharedBytesWritten}");

        // These will be the same since there are so few shared strings. For large numbers,
        // the custom provider will give smaller outputs while being considerably slower.
        Console.WriteLine($"Serialized size with shared strings: {defaultSharedBytesWritten}");
        Console.WriteLine($"Serialized size with custom shared strings: {customSharedBytesWritten}");
    }

    /// <summary>
    /// Creates a row with three well-defined column names and random values.
    /// </summary>
    public static Row CreateRow(int row)
    {
        return new Row()
        {
            Values = new Column[]
            {
                new Column { ColumnName = "Column" + (row++ % 500), Value = Guid.NewGuid().ToString() },
                new Column { ColumnName = "Column" + (row++ % 500), Value = Guid.NewGuid().ToString() },
                new Column { ColumnName = "Column" + (row++ % 500), Value = Guid.NewGuid().ToString() },
            }
        };
    }
}

/// <summary>
/// this is a "perfect" shared string writer implementation, which guarantees a single string is written only once.
/// this class will give optimal compression results, but will be considerably slower than FlatSharp's default implementation,
/// which uses a hashtable with flush-on-evict semantics and may write shared strings more than once.
/// </summary>
public sealed class PerfectSharedStringWriter : ISharedStringWriter
{
    private readonly Dictionary<string, List<int>> stringOffsetMap = new Dictionary<string, List<int>>();

    /// <summary>
    /// Must be true if there are any strings waiting to be flushed.
    /// </summary>
    public bool IsDirty => this.stringOffsetMap.Count > 0;

    /// <summary>
    /// Called when FlatSharp has finished a serialize operation. This is the signal to flush any strings that the 
    /// string writer is hanging onto.
    /// </summary>
    public void FlushWrites<TSpanWriter>(TSpanWriter writer, Span<byte> data, SerializationContext context) where TSpanWriter : ISpanWriter
    {
        foreach (var kvp in this.stringOffsetMap)
        {
            string str = kvp.Key;
            List<int> offsets = kvp.Value;

            // Write the string.
            int stringOffset = writer.WriteAndProvisionString(data, str, context);

            // Update all the pointers that need to point to that string.
            foreach (var offset in offsets)
            {
                writer.WriteUOffset(data, offset, stringOffset);
            }
        }
    }

    /// <summary>
    /// Prepares to write. In this case, we just need to clear the internal map for a new write operation,
    /// since the same SharedStringWriter is reused.
    /// </summary>
    public void Reset()
    {
        this.stringOffsetMap.Clear();
    }

    /// <summary>
    /// Writes a shared string by storing the string mapped to the offsets at which the string occurs in the buffer.
    /// </summary>
    public void WriteSharedString<TSpanWriter>(TSpanWriter spanWriter, Span<byte> data, int offset, string value, SerializationContext context)
        where TSpanWriter : ISpanWriter
    {
        if (!this.stringOffsetMap.TryGetValue(value, out List<int>? offsets))
        {
            offsets = new List<int>();
            this.stringOffsetMap[value] = offsets;
        }

        offsets.Add(offset);
    }
}