diff --git a/Textify.Tests/SpaceManager/ConversionTests.cs b/Textify.Tests/SpaceManager/ConversionTests.cs
index c4ab511..2581a79 100644
--- a/Textify.Tests/SpaceManager/ConversionTests.cs
+++ b/Textify.Tests/SpaceManager/ConversionTests.cs
@@ -267,6 +267,104 @@ public void TestConvertSpacesMultipleDifferentSpacesExplicitToText()
result.ShouldBe(expectedResult);
}
+ [TestMethod]
+ public void TestConvertSpacesSimpleNormalToText()
+ {
+ // v~~~~ This is a normal space
+ string text = "Hello world!";
+ string expectedResult = "Hello world!";
+ string result = SpaceConversionTools.ConvertSpacesSimple(text);
+ result.ShouldNotBeNullOrEmpty();
+ result.ShouldBe(expectedResult);
+ }
+
+ [TestMethod]
+ public void TestConvertSpacesSimpleNonBreakingSpaceToText()
+ {
+ // v~~~~ This is a non-breaking space
+ string text = "Hello world!";
+ string expectedResult = "Hello world!";
+ string result = SpaceConversionTools.ConvertSpacesSimple(text);
+ result.ShouldNotBeNullOrEmpty();
+ result.ShouldBe(expectedResult);
+ }
+
+ [TestMethod]
+ public void TestConvertSpacesSimpleMultipleNonBreakingSpacesToText()
+ {
+ // v~~~v~~~~~~~v~~v~~~v~~~~ These are the non-breaking spaces
+ string text = "Hello and welcome to the world!";
+ string expectedResult = "Hello and welcome to the world!";
+ string result = SpaceConversionTools.ConvertSpacesSimple(text);
+ result.ShouldNotBeNullOrEmpty();
+ result.ShouldBe(expectedResult);
+ }
+
+ [TestMethod]
+ public void TestConvertSpacesSimpleNonBreakingSpaceExplicitToText()
+ {
+ // vvvvvv~~~~ This is a non-breaking space
+ string text = "Hello\u00a0world!";
+ string expectedResult = "Hello world!";
+ string result = SpaceConversionTools.ConvertSpacesSimple(text);
+ result.ShouldNotBeNullOrEmpty();
+ result.ShouldBe(expectedResult);
+ }
+
+ [TestMethod]
+ public void TestConvertSpacesSimpleMultipleNonBreakingSpacesExplicitToText()
+ {
+ // vvvvvv~~~vvvvvv~~~~~~~vvvvvv~~vvvvvv~~~vvvvvv~~~~ These are the non-breaking spaces
+ string text = "Hello\u00a0and\u00a0welcome\u00a0to\u00a0the\u00a0world!";
+ string expectedResult = "Hello and welcome to the world!";
+ string result = SpaceConversionTools.ConvertSpacesSimple(text);
+ result.ShouldNotBeNullOrEmpty();
+ result.ShouldBe(expectedResult);
+ }
+
+ [TestMethod]
+ public void TestConvertSpacesSimpleWithBadSpacesExplicitToText()
+ {
+ foreach (var badSpace in Spaces.badSpaces)
+ {
+ char whiteSpace = Encoding.UTF8.GetString(badSpace.Value)[0];
+
+ // vvvvvvvvvvvv~~~~ This is a bad space
+ string text = $"Hello{whiteSpace}world!";
+ string expectedResult = "Hello world!";
+ string result = SpaceConversionTools.ConvertSpacesSimple(text);
+ result.ShouldNotBeNullOrEmpty();
+ result.ShouldBe(expectedResult);
+ }
+ }
+
+ [TestMethod]
+ public void TestConvertSpacesSimpleWithMultipleBadSpacesExplicitToText()
+ {
+ foreach (var badSpace in Spaces.badSpaces)
+ {
+ char whiteSpace = Encoding.UTF8.GetString(badSpace.Value)[0];
+
+ // vvvvvvvvvvvv~~~vvvvvvvvvvvv~~~~~~~vvvvvvvvvvvv~~vvvvvvvvvvvv~~~vvvvvvvvvvvv~~~~ These are bad spaces
+ string text = $"Hello{whiteSpace}and{whiteSpace}welcome{whiteSpace}to{whiteSpace}the{whiteSpace}world!";
+ string expectedResult = "Hello and welcome to the world!";
+ string result = SpaceConversionTools.ConvertSpacesSimple(text);
+ result.ShouldNotBeNullOrEmpty();
+ result.ShouldBe(expectedResult);
+ }
+ }
+
+ [TestMethod]
+ public void TestConvertSpacesSimpleMultipleDifferentSpacesExplicitToText()
+ {
+ // vvvvvv~~~vvvvvv~~~~~~~vvvvvv~~vvvvvv~~~vvvvvv~~~~ These are the non-breaking spaces
+ string text = "Hello\u00a0and\u200Bwelcome\u2008to\u200Bthe\u00a0world!";
+ string expectedResult = "Hello and welcome to the world!";
+ string result = SpaceConversionTools.ConvertSpacesSimple(text);
+ result.ShouldNotBeNullOrEmpty();
+ result.ShouldBe(expectedResult);
+ }
+
[TestMethod]
public void TestConvertSpacesNormalToStream()
{
diff --git a/Textify/SpaceManager/Conversion/SpaceConversionTools.cs b/Textify/SpaceManager/Conversion/SpaceConversionTools.cs
index ed6ad4c..fc0c6be 100644
--- a/Textify/SpaceManager/Conversion/SpaceConversionTools.cs
+++ b/Textify/SpaceManager/Conversion/SpaceConversionTools.cs
@@ -22,6 +22,7 @@
using System.IO;
using System.Linq;
using System.Text;
+using Textify.General;
using Textify.SpaceManager.Analysis;
namespace Textify.SpaceManager.Conversion
@@ -139,5 +140,18 @@ public static void ConvertSpacesTo(SpaceAnalysisResult analysisResult, Stream st
var bytes = ConvertSpaces(analysisResult);
stream.Write(bytes, 0, bytes.Length);
}
+
+ ///
+ /// Converts spaces to true spaces simply without any analysis result
+ ///
+ /// Target text to work on
+ public static string ConvertSpacesSimple(string text)
+ {
+ if (string.IsNullOrEmpty(text))
+ throw new ArgumentNullException(nameof(text));
+
+ // Convert the spaces to a string
+ return text.ReplaceAll(Spaces.badSpaceChars, " ");
+ }
}
}
diff --git a/Textify/SpaceManager/Spaces.cs b/Textify/SpaceManager/Spaces.cs
index 08bfdf7..3004c9b 100644
--- a/Textify/SpaceManager/Spaces.cs
+++ b/Textify/SpaceManager/Spaces.cs
@@ -24,31 +24,38 @@ namespace Textify.SpaceManager
{
internal static class Spaces
{
+ internal static readonly string[] badSpaceChars =
+ [
+ "\u0009", "\u00a0", "\u1680", "\u2000", "\u2001", "\u2002", "\u2003", "\u2004",
+ "\u2005", "\u2006", "\u2007", "\u2008", "\u2009", "\u200A", "\u202F", "\u205F",
+ "\u3000", "\u180E", "\u200B", "\u200C", "\u200D", "\u2060", "\uFEFF",
+ ];
+
internal static readonly Dictionary badSpaces = new()
{
- { "CHARACTER TABULATION", Encoding.UTF8.GetBytes("\u0009") },
- { "NON-BREAKING SPACE", Encoding.UTF8.GetBytes("\u00a0") },
- { "OGHAM SPACE MARK", Encoding.UTF8.GetBytes("\u1680") },
- { "EN QUAD", Encoding.UTF8.GetBytes("\u2000") },
- { "EM QUAD", Encoding.UTF8.GetBytes("\u2001") },
- { "EN SPACE", Encoding.UTF8.GetBytes("\u2002") },
- { "EM SPACE", Encoding.UTF8.GetBytes("\u2003") },
- { "THREE-PER-EM SPACE", Encoding.UTF8.GetBytes("\u2004") },
- { "FOUR-PER-EM SPACE", Encoding.UTF8.GetBytes("\u2005") },
- { "SIX-PER-EM SPACE", Encoding.UTF8.GetBytes("\u2006") },
- { "FIGURE SPACE", Encoding.UTF8.GetBytes("\u2007") },
- { "PUNCTUATION SPACE", Encoding.UTF8.GetBytes("\u2008") },
- { "THIN SPACE", Encoding.UTF8.GetBytes("\u2009") },
- { "HAIR SPACE", Encoding.UTF8.GetBytes("\u200A") },
- { "NARROW NON-BREAKING SPACE", Encoding.UTF8.GetBytes("\u202F") },
- { "MEDIUM MATHEMATICAL SPACE", Encoding.UTF8.GetBytes("\u205F") },
- { "IDEOGRAPHIC SPACE", Encoding.UTF8.GetBytes("\u3000") },
- { "MONGOLIAN VOWEL SEPARATOR", Encoding.UTF8.GetBytes("\u180E") },
- { "ZERO WIDTH SPACE", Encoding.UTF8.GetBytes("\u200B") },
- { "ZERO WIDTH NON-JOINER", Encoding.UTF8.GetBytes("\u200C") },
- { "ZERO WIDTH JOINER", Encoding.UTF8.GetBytes("\u200D") },
- { "WORD JOINER", Encoding.UTF8.GetBytes("\u2060") },
- { "ZERO WIDTH NON-BREAKING SPACE", Encoding.UTF8.GetBytes("\uFEFF") },
+ { "CHARACTER TABULATION", Encoding.UTF8.GetBytes(badSpaceChars[0]) },
+ { "NON-BREAKING SPACE", Encoding.UTF8.GetBytes(badSpaceChars[1]) },
+ { "OGHAM SPACE MARK", Encoding.UTF8.GetBytes(badSpaceChars[2]) },
+ { "EN QUAD", Encoding.UTF8.GetBytes(badSpaceChars[3]) },
+ { "EM QUAD", Encoding.UTF8.GetBytes(badSpaceChars[4]) },
+ { "EN SPACE", Encoding.UTF8.GetBytes(badSpaceChars[5]) },
+ { "EM SPACE", Encoding.UTF8.GetBytes(badSpaceChars[6]) },
+ { "THREE-PER-EM SPACE", Encoding.UTF8.GetBytes(badSpaceChars[7]) },
+ { "FOUR-PER-EM SPACE", Encoding.UTF8.GetBytes(badSpaceChars[8]) },
+ { "SIX-PER-EM SPACE", Encoding.UTF8.GetBytes(badSpaceChars[9]) },
+ { "FIGURE SPACE", Encoding.UTF8.GetBytes(badSpaceChars[10]) },
+ { "PUNCTUATION SPACE", Encoding.UTF8.GetBytes(badSpaceChars[11]) },
+ { "THIN SPACE", Encoding.UTF8.GetBytes(badSpaceChars[12]) },
+ { "HAIR SPACE", Encoding.UTF8.GetBytes(badSpaceChars[13]) },
+ { "NARROW NON-BREAKING SPACE", Encoding.UTF8.GetBytes(badSpaceChars[14]) },
+ { "MEDIUM MATHEMATICAL SPACE", Encoding.UTF8.GetBytes(badSpaceChars[15]) },
+ { "IDEOGRAPHIC SPACE", Encoding.UTF8.GetBytes(badSpaceChars[16]) },
+ { "MONGOLIAN VOWEL SEPARATOR", Encoding.UTF8.GetBytes(badSpaceChars[17]) },
+ { "ZERO WIDTH SPACE", Encoding.UTF8.GetBytes(badSpaceChars[18]) },
+ { "ZERO WIDTH NON-JOINER", Encoding.UTF8.GetBytes(badSpaceChars[19]) },
+ { "ZERO WIDTH JOINER", Encoding.UTF8.GetBytes(badSpaceChars[20]) },
+ { "WORD JOINER", Encoding.UTF8.GetBytes(badSpaceChars[21]) },
+ { "ZERO WIDTH NON-BREAKING SPACE", Encoding.UTF8.GetBytes(badSpaceChars[22]) },
};
}
}