Skip to content

Commit

Permalink
Add conversion to YAML as well as Markdown for JSON
Browse files Browse the repository at this point in the history
This makes it easier to index and query the data depending on the target functionality.
  • Loading branch information
kzu committed Jan 31, 2025
1 parent e11e2f0 commit cefb718
Show file tree
Hide file tree
Showing 19 changed files with 11,870 additions and 26 deletions.
2 changes: 2 additions & 0 deletions src/Directory.props
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
<VersionSuffix Condition="$(VersionSuffix) == 'main'"></VersionSuffix>
<!-- CI variable with feed URL -->
<SLEET_FEED_URL Condition="$(SLEET_FEED_URL) == ''">https://clarius.blob.core.windows.net/nuget/index.json</SLEET_FEED_URL>

<UserSecretsId>bcfa6afd-d165-4505-b752-f00318604d0c</UserSecretsId>
</PropertyGroup>

</Project>
14 changes: 13 additions & 1 deletion src/OpenLaw/App.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text.Json.Serialization.Metadata;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Http.Resilience;
using Spectre.Console;
Expand All @@ -17,6 +18,15 @@ public static CommandApp Create(out IServiceProvider services)
{
var collection = new ServiceCollection();

var config = new ConfigurationBuilder()
.AddEnvironmentVariables()
.AddUserSecrets<TypeRegistrar>()
.AddDotNetConfig()
.Build();

collection.AddSingleton(config)
.AddSingleton<IConfiguration>(_ => config);

collection.AddHttpClient()
.ConfigureHttpClientDefaults(defaults => defaults.ConfigureHttpClient(http =>
{
Expand Down Expand Up @@ -58,12 +68,14 @@ public static CommandApp Create(out IServiceProvider services)

app.Configure(config =>
{
// configure commands
config.AddBranch("saij", saij =>
{
saij.AddCommand<DownloadCommand>("download");
});

config.AddCommand<ConvertCommand>("convert");
config.AddCommand<FormatCommand>("format");

if (Environment.GetEnvironmentVariables().Contains("NO_COLOR"))
config.Settings.HelpProviderStyles = null;
});
Expand Down
21 changes: 21 additions & 0 deletions src/OpenLaw/AssistantOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
namespace Clarius.OpenLaw;

public class AssistantOptions
{
/// <summary>
/// The assistant id.
/// </summary>
public required string Id { get; set; }
/// <summary>
/// The assistant access key/token.
/// </summary>
public required string Key { get; set; }
/// <summary>
/// The assistant endpoint uri.
/// </summary>
public required string Uri { get; set; }
/// <summary>
/// The assistant vector store identifier.
/// </summary>
public required string Store { get; set; }
}
63 changes: 63 additions & 0 deletions src/OpenLaw/ConvertCommand.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using System.ComponentModel;
using System.Text.Json;
using Spectre.Console;
using Spectre.Console.Cli;
using YamlDotNet.Serialization;

namespace Clarius.OpenLaw;

[Description("Convierte archivos JSON a YAML y Markdown.")]
public class ConvertCommand(IAnsiConsole console) : Command<ConvertCommand.ConvertSettings>
{
public override int Execute(CommandContext context, ConvertSettings settings)
{
if (settings.File is not null)
{
ConvertFile(settings.File, true);
return 0;
}

if (Directory.Exists(settings.Directory))
{
console.Progress()
.Columns(
[
new TaskDescriptionColumn(),
new ProgressBarColumn(),
])
.Start(ctx =>
{
Parallel.ForEach(Directory.EnumerateFiles(settings.Directory, "*.json", SearchOption.AllDirectories), file =>
{
var task = ctx.AddTask($"Convirtiendo {file}");
task.IsIndeterminate = true;
ConvertFile(file, true);
task.Value(100);
});
});
}

return 0;
}

static void ConvertFile(string file, bool overwrite) => DictionaryConverter.ConvertFile(file, overwrite);

public class ConvertSettings : CommandSettings
{
public override ValidationResult Validate()
{
if (!string.IsNullOrWhiteSpace(File) && !System.IO.File.Exists(File))
return ValidationResult.Error("El archivo especificado '{File}' no existe.");

return base.Validate();
}

[Description("Archivo a convertir. Opcional.")]
[CommandArgument(0, "[file]")]
public string? File { get; set; }

[Description("Ubicación de archivos a convertir. Por defecto '%AppData%\\clarius\\openlaw'")]
[CommandOption("--dir")]
public string Directory { get; set; } = Environment.ExpandEnvironmentVariables("%AppData%\\clarius\\openlaw");
}
}
117 changes: 117 additions & 0 deletions src/OpenLaw/DictionaryConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
using System.Text;
using System.Text.Json;
using YamlDotNet.Serialization;

namespace Clarius.OpenLaw;

public class DictionaryConverter
{
static readonly JsonSerializerOptions options = new()
{
Converters = { new JsonDictionaryConverter() },
};

public static void ConvertFile(string jsonFile, bool overwrite)
{
var yamlDir = Path.Combine(Path.GetDirectoryName(jsonFile) ?? "", "yaml");
var yamlFile = Path.Combine(yamlDir, Path.ChangeExtension(Path.GetFileName(jsonFile), ".yaml"));
Directory.CreateDirectory(yamlDir);

var mdDir = Path.Combine(Path.GetDirectoryName(jsonFile) ?? "", "md");
var mdFile = Path.Combine(mdDir, Path.ChangeExtension(Path.GetFileName(jsonFile), ".md"));
Directory.CreateDirectory(mdDir);

Dictionary<string, object?>? dictionary = null;

if (overwrite || !File.Exists(yamlFile))
{
dictionary = Parse(File.ReadAllText(jsonFile));
if (dictionary is null)
return;

File.WriteAllText(yamlFile, ToYaml(dictionary), Encoding.UTF8);
}

if (overwrite || !File.Exists(mdFile))
{
if (dictionary is null)
dictionary = Parse(File.ReadAllText(jsonFile));
if (dictionary is null)
return;

File.WriteAllText(mdFile, ToMarkdown(dictionary), Encoding.UTF8);
}
}

public static Dictionary<string, object?>? Parse(string json)
=> JsonSerializer.Deserialize<Dictionary<string, object?>>(json, options);

public static string ToYaml(Dictionary<string, object?> dictionary)
{
var serializer = new SerializerBuilder()
.WithTypeConverter(new YamlDictionaryConverter())
.WithTypeConverter(new YamlListConverter())
.Build();

return serializer.Serialize(dictionary);
}

public static string ToMarkdown(Dictionary<string, object?> dictionary)
{
var output = new StringBuilder();
ProcessDictionary(0, dictionary!, output);
return output.ToString();
}

static void ProcessObject(int depth, object? obj, StringBuilder output)
{
if (obj is Dictionary<string, object?> dictionary)
{
ProcessDictionary(depth, dictionary, output);
}
else if (obj is List<object?> list)
{
foreach (var item in list)
{
ProcessObject(depth, item, output);
}
}
}

static void ProcessDictionary(int depth, Dictionary<string, object?> dictionary, StringBuilder output)
{
var title = dictionary
.Where(x => x.Key.StartsWith("titulo-", StringComparison.OrdinalIgnoreCase))
.FirstOrDefault().Value;

if (title is not null)
{
depth++;
output.AppendLine().AppendLine($"{new string('#', depth)} {title}");
}

foreach (var kvp in dictionary)
{
var key = kvp.Key;
var value = kvp.Value;
if (value is null)
continue;

if (key == "texto" &&
// We may have section title with text without an article #
(dictionary.ContainsKey("numero-articulo") || title is not null))
{
output.AppendLine().AppendLine(value.ToString());
}
else
{
ProcessObject(depth, value, output);
}
}

if (title is not null)
{
depth--;
}
}
}
30 changes: 24 additions & 6 deletions src/OpenLaw/DownloadCommand.cs
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
using System.IO;
using System.ComponentModel;
using System.Text;
using System.Text.Json;
using CliWrap;
using CliWrap.Buffered;
using Devlooped;
using Microsoft.Extensions.Options;
using Spectre.Console;
using Spectre.Console.Cli;

namespace Clarius.OpenLaw;

[Description("Descargar documentos del sistema SAIJ.")]
public class DownloadCommand(IAnsiConsole console, IHttpClientFactory http) : AsyncCommand<DownloadSettings>
{
static readonly JsonSerializerOptions options = new(JsonSerializerDefaults.Web)
static readonly JsonSerializerOptions readOptions = new()
{
WriteIndented = true
Converters = { new JsonDictionaryConverter() },
};

static readonly JsonSerializerOptions writeOptions = new()
{
WriteIndented = true,
};

public override async Task<int> ExecuteAsync(CommandContext context, DownloadSettings settings)
Expand Down Expand Up @@ -49,16 +54,29 @@ await console.Progress()
var file = Path.Combine(settings.Directory, id + ".json");
// Skip if file exists and has the same timestamp
if (File.Exists(file) && await GetJsonTimestampAsync(file) == timestamp)
{
// Source json file hasn't changed, so only convert if requested
if (settings.Convert)
// Don't force conversion if file already exists.
ConvertFile(file, overwrite: false);

continue;
}

File.WriteAllText(file, doc.ToJsonString(options));
// Converting to dictionary performs string multiline formatting and markup removal
var dictionary = JsonSerializer.Deserialize<Dictionary<string, object?>>(doc, readOptions);
File.WriteAllText(file, JsonSerializer.Serialize(dictionary, writeOptions));
if (settings.Convert)
ConvertFile(file, overwrite: true);
}
});

return 0;
}

async Task<long> GetJsonTimestampAsync(string file)
static void ConvertFile(string file, bool overwrite) => DictionaryConverter.ConvertFile(file, overwrite);

static async Task<long> GetJsonTimestampAsync(string file)
{
var jq = await Cli.Wrap(JQ.Path)
.WithArguments([".document.metadata.timestamp", file, "-r"])
Expand Down
12 changes: 11 additions & 1 deletion src/OpenLaw/DownloadSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,17 @@ public class DownloadSettings : CommandSettings
[CommandOption("--all")]
public bool All { get; set; } = false;

[Description("Ubicación opcional para descarga de archivos.")]
[Description("Forzar formato en documentos existentes.")]
[DefaultValue(false)]
[CommandOption("--ff", IsHidden = true)]
public bool ForceFormat { get; set; } = false;

[Description("Convertir automaticamente documentos nuevos descargados a YAML.")]
[DefaultValue(true)]
[CommandOption("--convert")]
public bool Convert { get; set; } = false;

[Description("Ubicación opcional para descarga de archivos. Por defecto '%AppData%\\clarius\\openlaw'")]
[CommandOption("--dir")]
public string Directory { get; set; } = Environment.ExpandEnvironmentVariables("%AppData%\\clarius\\openlaw");
}
61 changes: 61 additions & 0 deletions src/OpenLaw/FormatCommand.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
using System.ComponentModel;
using System.Text.Json;
using Spectre.Console;
using Spectre.Console.Cli;

namespace Clarius.OpenLaw;

[Description("Normaliza el formato de archivos JSON.")]
public class FormatCommand(IAnsiConsole console) : Command<FormatCommand.FormatSettings>
{
static readonly JsonSerializerOptions readOptions = new()
{
Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
Converters = { new JsonDictionaryConverter() },
};

static readonly JsonSerializerOptions writeOptions = new()
{
Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
WriteIndented = true,
};

public class FormatSettings : CommandSettings
{
[Description("Ubicación opcional para descarga de archivos. Por defecto '%AppData%\\clarius\\openlaw'")]
[CommandOption("--dir")]
public string Directory { get; set; } = Environment.ExpandEnvironmentVariables("%AppData%\\clarius\\openlaw");
}

public override int Execute(CommandContext context, FormatSettings settings)
{
if (Directory.Exists(settings.Directory))
{
console.Progress()
.Columns(
[
new TaskDescriptionColumn(),
new ProgressBarColumn(),
])
.Start(ctx =>
{
Parallel.ForEach(Directory.EnumerateFiles(settings.Directory, "*.json", SearchOption.AllDirectories), file =>
{
var task = ctx.AddTask($"Formateando {file}");
task.IsIndeterminate = true;
FormatFile(file);
task.Value(100);
});
});
}

return 0;
}

void FormatFile(string file)
{
var json = File.ReadAllText(file);
var dictionary = JsonSerializer.Deserialize<Dictionary<string, object?>>(json, readOptions);
File.WriteAllText(file, JsonSerializer.Serialize(dictionary, writeOptions));
}
}
Loading

0 comments on commit cefb718

Please sign in to comment.