diff --git a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs index df6c6c7b..1e111b38 100644 --- a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs +++ b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs @@ -15,17 +15,16 @@ public ParatextBackupTermsCorpus( using (var archive = ZipFile.OpenRead(fileName)) { ParatextProjectSettings settings = new ZipParatextProjectSettingsParser(archive).Parse(); - IEnumerable<(string, IEnumerable)> glosses = new ZipParatextTermsParser(archive).Parse( - settings, - termCategories, - useTermGlosses - ); + IEnumerable<(string, IReadOnlyList)> glosses = new ZipParatextProjectTermsParser( + archive, + settings + ).Parse(termCategories, useTermGlosses); string textId = $"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; IText text = new MemoryText( textId, - glosses.Select(kvp => new TextRow(textId, kvp.Item1) { Segment = kvp.Item2.ToList() }) + glosses.Select(kvp => new TextRow(textId, kvp.Item1) { Segment = kvp.Item2 }) ); AddText(text); } diff --git a/src/SIL.Machine/Corpora/ParatextTermsParserBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs similarity index 88% rename from src/SIL.Machine/Corpora/ParatextTermsParserBase.cs rename to src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs index c58d2db8..ec68b400 100644 --- a/src/SIL.Machine/Corpora/ParatextTermsParserBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Collections.Immutable; using System.IO; using System.Linq; using System.Reflection; @@ -9,7 +10,7 @@ namespace SIL.Machine.Corpora { - public abstract class ParatextTermsParserBase + public abstract class ParatextProjectTermsParserBase { private static readonly List PredefinedTermsListTypes = new List() { @@ -34,19 +35,30 @@ public abstract class ParatextTermsParserBase private static readonly Regex ContentInBracketsRegex = new Regex(@"^\[(.+?)\]$", RegexOptions.Compiled); private static readonly Regex NumericalInformationRegex = new Regex(@"\s+\d+(\.\d+)*$", RegexOptions.Compiled); - public IEnumerable<(string, IEnumerable)> Parse( - ParatextProjectSettings settings, + private readonly ParatextProjectSettings _settings; + + protected ParatextProjectTermsParserBase(ParatextProjectSettings settings) + { + _settings = settings; + } + + protected ParatextProjectTermsParserBase(ParatextProjectSettingsParserBase settingsParser) + { + _settings = settingsParser.Parse(); + } + + public IEnumerable<(string TermId, IReadOnlyList Glosses)> Parse( IEnumerable termCategories, bool useTermGlosses = true ) { XDocument biblicalTermsDoc; IDictionary termIdToCategoryDictionary; - if (settings.BiblicalTermsListType == "Project") + if (_settings.BiblicalTermsListType == "Project") { - if (Exists(settings.BiblicalTermsFileName)) + if (Exists(_settings.BiblicalTermsFileName)) { - using (Stream keyTermsFile = Open(settings.BiblicalTermsFileName)) + using (Stream keyTermsFile = Open(_settings.BiblicalTermsFileName)) { biblicalTermsDoc = XDocument.Load(keyTermsFile); termIdToCategoryDictionary = GetCategoryPerId(biblicalTermsDoc); @@ -65,12 +77,12 @@ public abstract class ParatextTermsParserBase } } } - else if (PredefinedTermsListTypes.Contains(settings.BiblicalTermsListType)) + else if (PredefinedTermsListTypes.Contains(_settings.BiblicalTermsListType)) { using ( Stream keyTermsFile = Assembly .GetExecutingAssembly() - .GetManifestResourceStream("SIL.Machine.Corpora." + settings.BiblicalTermsFileName) + .GetManifestResourceStream("SIL.Machine.Corpora." + _settings.BiblicalTermsFileName) ) { biblicalTermsDoc = XDocument.Load(keyTermsFile); @@ -84,9 +96,9 @@ public abstract class ParatextTermsParserBase XDocument termsGlossesDoc = null; if ( - settings.LanguageCode != null - && settings.BiblicalTermsListType == "Major" - && SupportedLanguageTermsLocalizationXmls.TryGetValue(settings.LanguageCode, out string resourceName) + _settings.LanguageCode != null + && _settings.BiblicalTermsListType == "Major" + && SupportedLanguageTermsLocalizationXmls.TryGetValue(_settings.LanguageCode, out string resourceName) ) { using (Stream keyTermsFile = Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName)) @@ -147,9 +159,9 @@ public abstract class ParatextTermsParserBase { return termsRenderings .Concat(termsGlosses.Where(kvp => !termsRenderings.ContainsKey(kvp.Key))) - .Select(kvp => (kvp.Key, kvp.Value)); + .Select(kvp => (kvp.Key, (IReadOnlyList)kvp.Value.ToList())); } - return new List<(string, IEnumerable)>(); + return new List<(string, IReadOnlyList)>(); } private static bool IsInCategory( diff --git a/src/SIL.Machine/Corpora/ZipParatextTermsParser.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs similarity index 67% rename from src/SIL.Machine/Corpora/ZipParatextTermsParser.cs rename to src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs index d41f2477..863cb563 100644 --- a/src/SIL.Machine/Corpora/ZipParatextTermsParser.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs @@ -3,11 +3,12 @@ namespace SIL.Machine.Corpora { - public class ZipParatextTermsParser : ParatextTermsParserBase + public class ZipParatextProjectTermsParser : ParatextProjectTermsParserBase { private readonly ZipArchive _archive; - public ZipParatextTermsParser(ZipArchive archive) + public ZipParatextProjectTermsParser(ZipArchive archive, ParatextProjectSettings settings = null) + : base(settings ?? new ZipParatextProjectSettingsParser(archive).Parse()) { _archive = archive; } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryProjectTermsParser.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs similarity index 73% rename from tests/SIL.Machine.Tests/Corpora/MemoryProjectTermsParser.cs rename to tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs index d4bfc594..7fb93798 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryProjectTermsParser.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs @@ -2,7 +2,8 @@ namespace SIL.Machine.Corpora; -public class MemoryParatextTermsParser(IDictionary files) : ParatextTermsParserBase +public class MemoryParatextProjectTermsParser(ParatextProjectSettings settings, IDictionary files) + : ParatextProjectTermsParserBase(settings) { public IDictionary Files { get; } = files; diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextBackupTermsCorpus.cs b/tests/SIL.Machine.Tests/Corpora/ParatextBackupTermsCorpus.cs new file mode 100644 index 00000000..eb66f04c --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/ParatextBackupTermsCorpus.cs @@ -0,0 +1,17 @@ +using NUnit.Framework; + +namespace SIL.Machine.Corpora; + +[TestFixture] +public class ParatextBackupTermsCorpusTests +{ + [Test] + public void CreateCorpus() + { + string backupDir = CorporaTestHelpers.CreateTestParatextBackup(); + var corpus = new ParatextBackupTermsCorpus(backupDir, new string[] { "PN" }, true); + IList rows = corpus.GetRows().ToList(); + Assert.That(rows.Count, Is.EqualTo(1)); + Assert.That(rows.First().Text, Is.EqualTo("Xerxes")); + } +} diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpus.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpus.cs deleted file mode 100644 index 740d17db..00000000 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpus.cs +++ /dev/null @@ -1,26 +0,0 @@ -namespace SIL.Machine.Corpora; - -public class ParatextProjectTermsCorpus : DictionaryTextCorpus -{ - public ParatextProjectTermsCorpus( - IDictionary files, - ParatextProjectSettings settings, - IEnumerable termCategories, - bool useTermGlosses = true - ) - { - IEnumerable<(string, IEnumerable)> glosses = new MemoryParatextTermsParser(files).Parse( - settings, - termCategories, - useTermGlosses - ); - string textId = - $"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; - - IText text = new MemoryText( - textId, - glosses.Select(kvp => new TextRow(textId, kvp.Item1) { Segment = kvp.Item2.ToList() }) - ); - AddText(text); - } -} diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextTermsCorpusTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs similarity index 72% rename from tests/SIL.Machine.Tests/Corpora/ParatextTermsCorpusTests.cs rename to tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs index 59a91f7c..df1409fb 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextTermsCorpusTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs @@ -5,7 +5,7 @@ namespace SIL.Machine.Corpora; [TestFixture] -public class ParatextTermsCorpusTests +public class ParatextProjectTermsParserTests { [Test] public void TestGetKeyTermsFromTermsRenderings() @@ -38,9 +38,9 @@ public void TestGetKeyTermsFromTermsRenderings() } } ); - IList rows = env.Corpus.GetRows().ToList(); - Assert.That(rows.Count, Is.EqualTo(1)); - Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Xerxes")); + IEnumerable<(string TermId, IReadOnlyList Glosses)> terms = env.GetGlosses(); + Assert.That(terms.Count, Is.EqualTo(1)); + Assert.That(string.Join(" ", terms.First().Glosses), Is.EqualTo("Xerxes")); } [Test] @@ -53,9 +53,9 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings() ), useTermGlosses: true ); - IList rows = env.Corpus.GetRows().ToList(); - Assert.That(rows.Count, Is.EqualTo(5726)); - Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Abagtha")); + IEnumerable<(string TermId, IReadOnlyList Glosses)> terms = env.GetGlosses(); + Assert.That(terms.Count, Is.EqualTo(5726)); + Assert.That(string.Join(" ", terms.First().Glosses), Is.EqualTo("Abagtha")); } [Test] @@ -68,8 +68,8 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_DoNotUseTermG ), useTermGlosses: false ); - IList rows = env.Corpus.GetRows().ToList(); - Assert.That(rows.Count, Is.EqualTo(0)); + IEnumerable<(string TermId, IReadOnlyList Glosses)> terms = env.GetGlosses(); + Assert.That(terms.Count, Is.EqualTo(0)); } [Test] @@ -82,9 +82,9 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_PreferLocaliz ), useTermGlosses: true ); - IList rows = env.Corpus.GetRows().ToList(); - Assert.That(rows.Count, Is.EqualTo(5726)); - Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Abagtha")); + IEnumerable<(string TermId, IReadOnlyList Glosses)> terms = env.GetGlosses(); + Assert.That(terms.Count, Is.EqualTo(5726)); + Assert.That(string.Join(" ", terms.First().Glosses), Is.EqualTo("Abagtha")); } [Test] @@ -98,9 +98,9 @@ public void TestGetKeyTermsFromTermsLocalizations_() ), useTermGlosses: true ); - IList rows = env.Corpus.GetRows().ToList(); - Assert.That(rows.Count, Is.EqualTo(5715)); - Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Aaron")); + IEnumerable<(string TermId, IReadOnlyList Glosses)> terms = env.GetGlosses(); + Assert.That(terms.Count, Is.EqualTo(5715)); + Assert.That(string.Join(" ", terms.First().Glosses), Is.EqualTo("Aaron")); } [Test] @@ -129,10 +129,10 @@ public void TestGetKeyTermsFromTermsLocalizations_TermRenderingsExists_PreferLoc }, useTermGlosses: true ); - IList rows = env.Corpus.GetRows().ToList(); - Assert.That(rows.Count, Is.EqualTo(5726)); - Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Xerxes")); - Assert.That(string.Join(" ", rows[2].Segment), Is.EqualTo("Abi")); + IReadOnlyList<(string TermId, IReadOnlyList Glosses)> terms = env.GetGlosses().ToList(); + Assert.That(terms.Count, Is.EqualTo(5726)); + Assert.That(string.Join(" ", terms[1].Glosses), Is.EqualTo("Abagtha")); + Assert.That(string.Join(" ", terms[2].Glosses), Is.EqualTo("Abi")); } [Test] @@ -144,7 +144,7 @@ public void TestGetKeyTermsFromTermsLocalizations_TermRenderingsExists_PreferLoc public void TestStripParens(string testString, string expectedOutput, char left = '(', char right = ')') { Assert.That( - ParatextTermsParserBase.StripParens(testString, left: left, right: right), + ParatextProjectTermsParserBase.StripParens(testString, left: left, right: right), Is.EqualTo(expectedOutput) ); } @@ -159,7 +159,7 @@ public void TestStripParens(string testString, string expectedOutput, char left [TestCase("Ahasuerus, Xerxes; Assuerus", new string[] { "Ahasuerus", "Xerxes", "Assuerus" })] public void TestGetGlosses(string glossString, IReadOnlyList expectedOutput) { - Assert.That(ParatextTermsParserBase.GetGlosses(glossString), Is.EqualTo(expectedOutput)); + Assert.That(ParatextProjectTermsParserBase.GetGlosses(glossString), Is.EqualTo(expectedOutput)); } private class TestEnvironment( @@ -168,13 +168,14 @@ private class TestEnvironment( bool useTermGlosses = true ) { - public ParatextProjectTermsCorpus Corpus { get; } = - new ParatextProjectTermsCorpus( - files ?? new(), - settings ?? new DefaultParatextProjectSettings(), - new string[] { "PN" }, - useTermGlosses - ); + private readonly bool _useTermGlosses = useTermGlosses; + public ParatextProjectTermsParserBase Parser { get; } = + new MemoryParatextProjectTermsParser(settings ?? new DefaultParatextProjectSettings(), files ?? new()); + + public IEnumerable<(string TermId, IReadOnlyList Glosses)> GetGlosses() + { + return Parser.Parse(new string[] { "PN" }, _useTermGlosses); + } } private class DefaultParatextProjectSettings(