-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve newline serialization in YAML
We want to be as compact as possible to save on unnecessary tokens.
- Loading branch information
Showing
4 changed files
with
14,341 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,82 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.ComponentModel.DataAnnotations; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Text.Json; | ||
using System.Text.Json.Nodes; | ||
using System.Threading.Tasks; | ||
using System.Text.Json; | ||
using System.Text.RegularExpressions; | ||
using NuGet.Versioning; | ||
using SharpYaml; | ||
using Xunit; | ||
using YamlDotNet.Serialization; | ||
|
||
namespace Clarius.OpenLaw; | ||
|
||
public class Misc | ||
public partial class Misc | ||
{ | ||
[Theory] | ||
[InlineData(@"SaijSamples\123456789-0abc-defg-g81-87000tcanyel.json")] | ||
[InlineData(@"SaijSamples\123456789-0abc-defg-g56-95000scanyel.json")] | ||
public void ConvertJsonToYaml(string jsonFile) | ||
{ | ||
var json = File.ReadAllText(jsonFile); | ||
var json = File.ReadAllText(jsonFile).ReplaceLineEndings(); | ||
|
||
var options = new JsonSerializerOptions | ||
{ | ||
Converters = { new DictionaryConverter() }, | ||
}; | ||
|
||
var dictionary = JsonSerializer.Deserialize<Dictionary<string, object>>(json, options); | ||
var dictionary = JsonSerializer.Deserialize<Dictionary<string, object?>>(json, options); | ||
|
||
EnsureMultiline(dictionary!); | ||
|
||
var serializer = new SerializerBuilder() | ||
.WithTypeConverter(new YamlDictionaryConverter()) | ||
.WithTypeConverter(new YamlListConverter()) | ||
.Build(); | ||
|
||
// Serialize the dictionary to YAML | ||
var serializer = new SerializerBuilder().Build(); | ||
var yaml = serializer.Serialize(dictionary); | ||
|
||
// Save the YAML to a file | ||
var yamlFile = Path.ChangeExtension(jsonFile, ".yaml"); | ||
File.WriteAllText($@"..\..\..\{yamlFile}", yaml); | ||
} | ||
|
||
void EnsureMultiline(IDictionary<string, object> model) | ||
{ | ||
static string ReplaceParagraphs(string value) | ||
{ | ||
var replaced = value.Replace("[[p]]", "\n").Replace("[[/p]]", "\n"); | ||
var multiline = MultilineExpr().Replace(replaced, "\n\n"); | ||
|
||
return multiline.Trim(); | ||
} | ||
|
||
foreach (var key in model.Keys) | ||
{ | ||
if (model[key] is IDictionary<string, object> submodel) | ||
{ | ||
EnsureMultiline(submodel); | ||
} | ||
else if (model[key] is string value) | ||
{ | ||
if (value.Contains("[[p]]")) | ||
{ | ||
model[key] = ReplaceParagraphs(value); | ||
} | ||
} | ||
else if (model[key] is IList<object> list) | ||
{ | ||
for (int i = 0; i < list.Count; i++) | ||
{ | ||
if (list[i] is IDictionary<string, object> itemmodel) | ||
{ | ||
EnsureMultiline(itemmodel); | ||
} | ||
if (list[i] is string itemvalue && itemvalue.Contains("[[p]]")) | ||
{ | ||
list[i] = ReplaceParagraphs(itemvalue); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
[GeneratedRegex(@"(\r?\n){3,}", RegexOptions.Compiled)] | ||
private static partial Regex MultilineExpr(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
using Spectre.Console; | ||
using YamlDotNet.Core; | ||
using YamlDotNet.Core.Events; | ||
using YamlDotNet.Serialization; | ||
using YamlDotNet.Serialization.TypeInspectors; | ||
|
||
namespace Clarius.OpenLaw; | ||
|
||
public class YamlListConverter : IYamlTypeConverter | ||
{ | ||
public bool Accepts(Type type) => typeof(List<object?>).IsAssignableFrom(type); | ||
public object? ReadYaml(IParser parser, Type type, ObjectDeserializer rootDeserializer) => throw new NotImplementedException(); | ||
public void WriteYaml(IEmitter emitter, object? value, Type type, ObjectSerializer serializer) | ||
{ | ||
if (value == null) | ||
return; | ||
|
||
var list = (List<object?>)value; | ||
emitter.Emit(new SequenceStart(null, null, false, SequenceStyle.Block)); | ||
foreach (var item in list) | ||
{ | ||
if (item == null) | ||
continue; | ||
|
||
if (item is string str) | ||
{ | ||
if (str.Contains('\n')) | ||
emitter.Emit(new Scalar(null, null, str, ScalarStyle.Literal, true, false)); | ||
else | ||
emitter.Emit(new Scalar(str)); | ||
} | ||
else | ||
{ | ||
serializer.Invoke(item); | ||
} | ||
} | ||
emitter.Emit(new SequenceEnd()); | ||
} | ||
} | ||
|
||
public class YamlDictionaryConverter : IYamlTypeConverter | ||
{ | ||
public bool Accepts(Type type) => typeof(Dictionary<string, object?>).IsAssignableFrom(type); | ||
|
||
public object? ReadYaml(IParser parser, Type type, ObjectDeserializer rootDeserializer) => throw new NotImplementedException(); | ||
|
||
public void WriteYaml(IEmitter emitter, object? value, Type type, ObjectSerializer serializer) | ||
{ | ||
if (value == null) | ||
return; | ||
|
||
var dictionary = (Dictionary<string, object?>)value; | ||
emitter.Emit(new MappingStart()); | ||
|
||
foreach (var kvp in dictionary) | ||
{ | ||
if (kvp.Value == null) | ||
continue; | ||
|
||
emitter.Emit(new Scalar(kvp.Key)); | ||
if (kvp.Value is string str) | ||
{ | ||
if (str.Contains('\n')) | ||
emitter.Emit(new Scalar(null, null, str, ScalarStyle.Literal, true, false)); | ||
else | ||
emitter.Emit(new Scalar(str)); | ||
} | ||
else | ||
{ | ||
serializer.Invoke(kvp.Value); | ||
} | ||
} | ||
|
||
emitter.Emit(new MappingEnd()); | ||
} | ||
} |
Oops, something went wrong.