Skip to content

Commit

Permalink
imp - prf - Faster unicode lookups!
Browse files Browse the repository at this point in the history
---

We've boosted the speed of unicode lookups by using a "streaming" XML method. This allows your applications that rely on this information to be faster than before!

---

Type: imp
Breaking: False
Doc Required: False
Backport Required: False
Part: 1/1
  • Loading branch information
AptiviCEO committed Sep 16, 2024
1 parent 4e2db6a commit 012247d
Show file tree
Hide file tree
Showing 3 changed files with 319 additions and 82 deletions.
152 changes: 95 additions & 57 deletions Textify.Data.Analysis/Unicode/UnicodeCharDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,38 +22,17 @@

namespace Textify.Data.Analysis.Unicode
{
/// <summary>
/// Name alias
/// </summary>
[XmlRoot(ElementName = "name-alias")]
public class Namealias
{

/// <summary>
/// Alias name
/// </summary>
[XmlAttribute(AttributeName = "alias")]
public string Alias { get; set; } = "";

/// <summary>
/// Alias type
/// </summary>
[XmlAttribute(AttributeName = "type")]
public string Type { get; set; } = "";
}

/// <summary>
/// Character
/// </summary>
[XmlRoot(ElementName = "char")]
public class Char
public class UnicodeCharInfo
{

/// <summary>
/// Name aliases
/// </summary>
[XmlElement(ElementName = "namealias")]
public List<Namealias> Namealias { get; set; } = [];
public Namealias[] Namealias { get; set; } = [];

/// <summary>
/// Codepage number
Expand Down Expand Up @@ -721,6 +700,42 @@ public class Char
[XmlAttribute(AttributeName = "ExtPict")]
public string ExtPict { get; set; } = "";

/// <summary>
/// NFKC_SCF?
/// </summary>
[XmlAttribute(AttributeName = "NFKC_SCF")]
public string NFKCSCF { get; set; } = "";

/// <summary>
/// ID_Compat_Math_Start
/// </summary>
[XmlAttribute(AttributeName = "ID_Compat_Math_Start")]
public string IdCompatMathStart { get; set; } = "";

/// <summary>
/// ID_Compat_Math_Continue
/// </summary>
[XmlAttribute(AttributeName = "ID_Compat_Math_Continue")]
public string IdCompatMathContinue { get; set; } = "";

/// <summary>
/// IDSU?
/// </summary>
[XmlAttribute(AttributeName = "IDSU")]
public string IDSU { get; set; } = "";

/// <summary>
/// InCB?
/// </summary>
[XmlAttribute(AttributeName = "InCB")]
public string InCB { get; set; } = "";

/// <summary>
/// MCM?
/// </summary>
[XmlAttribute(AttributeName = "MCM")]
public string MCM { get; set; } = "";

// Unihan info

/// <summary>
Expand Down Expand Up @@ -825,6 +840,12 @@ public class Char
[XmlAttribute(AttributeName = "kMandarin")]
public string KMandarin { get; set; } = "";

/// <summary>
/// CihaiT
/// </summary>
[XmlAttribute(AttributeName = "kCihaiT")]
public string KCihaiT { get; set; } = "";

/// <summary>
/// SBGY
/// </summary>
Expand All @@ -837,6 +858,12 @@ public class Char
[XmlAttribute(AttributeName = "kCangjie")]
public string KCangjie { get; set; } = "";

/// <summary>
/// Kang Xi
/// </summary>
[XmlAttribute(AttributeName = "kKangXi")]
public string KKangXi { get; set; } = "";

/// <summary>
/// Hanyu Pinyin
/// </summary>
Expand All @@ -855,12 +882,48 @@ public class Char
[XmlAttribute(AttributeName = "kIRGKangXi")]
public double KIRGKangXi { get; set; }

/// <summary>
/// Morohashi
/// </summary>
[XmlAttribute(AttributeName = "kMorohashi")]
public string KMorohashi { get; set; } = "";

/// <summary>
/// Total strokes
/// </summary>
[XmlAttribute(AttributeName = "kTotalStrokes")]
public string KTotalStrokes { get; set; } = "";

/// <summary>
/// Japanese
/// </summary>
[XmlAttribute(AttributeName = "kJapanese")]
public string KJapanese { get; set; } = "";

/// <summary>
/// Moji Joho
/// </summary>
[XmlAttribute(AttributeName = "kMojiJoho")]
public string KMojiJoho { get; set; } = "";

/// <summary>
/// Fanqie
/// </summary>
[XmlAttribute(AttributeName = "kFanqie")]
public string KFanqie { get; set; } = "";

/// <summary>
/// Strange
/// </summary>
[XmlAttribute(AttributeName = "kStrange")]
public string KStrange { get; set; } = "";

/// <summary>
/// RS Adobe Japan 1.6
/// </summary>
[XmlAttribute(AttributeName = "kRSAdobe_Japan1_6")]
public string KRSAdobeJapan16 { get; set; } = "";

/// <summary>
/// Cantonese
/// </summary>
Expand All @@ -875,46 +938,21 @@ public class Char
}

/// <summary>
/// Character repertoire
/// </summary>
[XmlRoot(ElementName = "repertoire")]
public class Repertoire
{
/// <summary>
/// List of characters
/// </summary>
[XmlElement(ElementName = "char")]
public Char[] Char { get; set; } = [];
}

/// <summary>
/// Unicode Database
/// Name alias
/// </summary>
[XmlRoot(ElementName = "ucd")]
public class Ucd
[XmlRoot(ElementName = "name-alias")]
public class Namealias
{
/// <summary>
/// Unicode version
/// </summary>
[XmlElement(ElementName = "description")]
public string Description { get; set; } = "";

/// <summary>
/// The Repertoire
/// </summary>
[XmlElement(ElementName = "repertoire")]
public Repertoire? Repertoire { get; set; }

/// <summary>
/// XML namespace
/// Alias name
/// </summary>
[XmlAttribute(AttributeName = "xmlns")]
public string Xmlns { get; set; } = "";
[XmlAttribute(AttributeName = "alias")]
public string Alias { get; set; } = "";

/// <summary>
/// Text
/// Alias type
/// </summary>
[XmlText]
public string Text { get; set; } = "";
[XmlAttribute(AttributeName = "type")]
public string Type { get; set; } = "";
}
}
8 changes: 4 additions & 4 deletions Textify.Data.Analysis/Unicode/UnicodeQuery.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,30 +30,30 @@ public static class UnicodeQuery
/// Queries the character
/// </summary>
/// <param name="character">Character</param>
public static Char QueryChar(char character) =>
public static UnicodeCharInfo QueryChar(char character) =>
QueryChar(Convert.ToInt32(character), UnicodeQueryType.Full);

/// <summary>
/// Queries the character
/// </summary>
/// <param name="charNum">Character number</param>
public static Char QueryChar(int charNum) =>
public static UnicodeCharInfo QueryChar(int charNum) =>
QueryChar(charNum, UnicodeQueryType.Full);

/// <summary>
/// Queries the character
/// </summary>
/// <param name="character">Character</param>
/// <param name="type">Database type to query</param>
public static Char QueryChar(char character, UnicodeQueryType type) =>
public static UnicodeCharInfo QueryChar(char character, UnicodeQueryType type) =>
QueryChar(Convert.ToInt32(character), type);

/// <summary>
/// Queries the character
/// </summary>
/// <param name="charNum">Character number</param>
/// <param name="type">Database type to query</param>
public static Char QueryChar(int charNum, UnicodeQueryType type) =>
public static UnicodeCharInfo QueryChar(int charNum, UnicodeQueryType type) =>
UnicodeQueryHandler.Serialize(charNum, type);
}
}
Loading

0 comments on commit 012247d

Please sign in to comment.