Skip to content

Commit

Permalink
Improve newline serialization in YAML
Browse files Browse the repository at this point in the history
We want to be as compact as possible to save on unnecessary tokens.
  • Loading branch information
kzu committed Jan 28, 2025
1 parent d11248b commit 71061cc
Show file tree
Hide file tree
Showing 4 changed files with 14,341 additions and 20 deletions.
71 changes: 58 additions & 13 deletions src/Tests/Misc.cs
Original file line number Diff line number Diff line change
@@ -1,37 +1,82 @@
using System;
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Threading.Tasks;
using System.Text.Json;
using System.Text.RegularExpressions;
using NuGet.Versioning;
using SharpYaml;
using Xunit;
using YamlDotNet.Serialization;

namespace Clarius.OpenLaw;

public class Misc
public partial class Misc
{
[Theory]
[InlineData(@"SaijSamples\123456789-0abc-defg-g81-87000tcanyel.json")]
[InlineData(@"SaijSamples\123456789-0abc-defg-g56-95000scanyel.json")]
public void ConvertJsonToYaml(string jsonFile)
{
var json = File.ReadAllText(jsonFile);
var json = File.ReadAllText(jsonFile).ReplaceLineEndings();

var options = new JsonSerializerOptions
{
Converters = { new DictionaryConverter() },
};

var dictionary = JsonSerializer.Deserialize<Dictionary<string, object>>(json, options);
var dictionary = JsonSerializer.Deserialize<Dictionary<string, object?>>(json, options);

EnsureMultiline(dictionary!);

var serializer = new SerializerBuilder()
.WithTypeConverter(new YamlDictionaryConverter())
.WithTypeConverter(new YamlListConverter())
.Build();

// Serialize the dictionary to YAML
var serializer = new SerializerBuilder().Build();
var yaml = serializer.Serialize(dictionary);

// Save the YAML to a file
var yamlFile = Path.ChangeExtension(jsonFile, ".yaml");
File.WriteAllText($@"..\..\..\{yamlFile}", yaml);
}

void EnsureMultiline(IDictionary<string, object> model)
{
static string ReplaceParagraphs(string value)
{
var replaced = value.Replace("[[p]]", "\n").Replace("[[/p]]", "\n");
var multiline = MultilineExpr().Replace(replaced, "\n\n");

return multiline.Trim();
}

foreach (var key in model.Keys)
{
if (model[key] is IDictionary<string, object> submodel)
{
EnsureMultiline(submodel);
}
else if (model[key] is string value)
{
if (value.Contains("[[p]]"))
{
model[key] = ReplaceParagraphs(value);
}
}
else if (model[key] is IList<object> list)
{
for (int i = 0; i < list.Count; i++)
{
if (list[i] is IDictionary<string, object> itemmodel)
{
EnsureMultiline(itemmodel);
}
if (list[i] is string itemvalue && itemvalue.Contains("[[p]]"))
{
list[i] = ReplaceParagraphs(itemvalue);
}
}
}
}
}

[GeneratedRegex(@"(\r?\n){3,}", RegexOptions.Compiled)]
private static partial Regex MultilineExpr();
}
76 changes: 76 additions & 0 deletions src/Tests/MultilineDescriptor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
using Spectre.Console;
using YamlDotNet.Core;
using YamlDotNet.Core.Events;
using YamlDotNet.Serialization;
using YamlDotNet.Serialization.TypeInspectors;

namespace Clarius.OpenLaw;

public class YamlListConverter : IYamlTypeConverter
{
public bool Accepts(Type type) => typeof(List<object?>).IsAssignableFrom(type);
public object? ReadYaml(IParser parser, Type type, ObjectDeserializer rootDeserializer) => throw new NotImplementedException();
public void WriteYaml(IEmitter emitter, object? value, Type type, ObjectSerializer serializer)
{
if (value == null)
return;

var list = (List<object?>)value;
emitter.Emit(new SequenceStart(null, null, false, SequenceStyle.Block));
foreach (var item in list)
{
if (item == null)
continue;

if (item is string str)
{
if (str.Contains('\n'))
emitter.Emit(new Scalar(null, null, str, ScalarStyle.Literal, true, false));
else
emitter.Emit(new Scalar(str));
}
else
{
serializer.Invoke(item);
}
}
emitter.Emit(new SequenceEnd());
}
}

public class YamlDictionaryConverter : IYamlTypeConverter
{
public bool Accepts(Type type) => typeof(Dictionary<string, object?>).IsAssignableFrom(type);

public object? ReadYaml(IParser parser, Type type, ObjectDeserializer rootDeserializer) => throw new NotImplementedException();

public void WriteYaml(IEmitter emitter, object? value, Type type, ObjectSerializer serializer)
{
if (value == null)
return;

var dictionary = (Dictionary<string, object?>)value;
emitter.Emit(new MappingStart());

foreach (var kvp in dictionary)
{
if (kvp.Value == null)
continue;

emitter.Emit(new Scalar(kvp.Key));
if (kvp.Value is string str)
{
if (str.Contains('\n'))
emitter.Emit(new Scalar(null, null, str, ScalarStyle.Literal, true, false));
else
emitter.Emit(new Scalar(str));
}
else
{
serializer.Invoke(kvp.Value);
}
}

emitter.Emit(new MappingEnd());
}
}
Loading

0 comments on commit 71061cc

Please sign in to comment.