diff --git a/node-usfm-parser/src/usfmGenerator.js b/node-usfm-parser/src/usfmGenerator.js index 93191aa8..3a866595 100644 --- a/node-usfm-parser/src/usfmGenerator.js +++ b/node-usfm-parser/src/usfmGenerator.js @@ -14,7 +14,6 @@ class USFMGenerator { this.usfmString += "+"; } this.usfmString += `${usjObj.marker} `; - } ["code", "number", "caller"].forEach((key) => { if (usjObj[key]) { @@ -38,9 +37,6 @@ class USFMGenerator { this.usfmString += `\\vp ${usjObj.pubnumber} \\vp* ` } } - if (usjObj.category) { - this.usfmString += `\\cat ${usjObj.category}\\cat*\n`; - } if (Array.isArray(usjObj.content)) { usjObj.content.forEach((item) => { if (typeof item === "string") { diff --git a/web-usfm-parser/package.json b/web-usfm-parser/package.json index 2e61255f..7dbef9ec 100644 --- a/web-usfm-parser/package.json +++ b/web-usfm-parser/package.json @@ -11,7 +11,8 @@ }, "source": "src/index.js", "scripts": { - "build": "parcel build src/index.js" + "build": "parcel build src/index.js", + "test": "mocha --parallel --timeout 40000" }, "files": [ "dist/", @@ -40,6 +41,9 @@ "parcel": "latest", "path-browserify": "^1.0.1", "process": "^0.11.10", - "web-tree-sitter": "^0.22.6" + "web-tree-sitter": "^0.22.6", + "glob": "^11.0.0", + "mocha": "^10.7.3", + "xml2js": "^0.6.2" } } diff --git a/web-usfm-parser/src/usfmGenerator.js b/web-usfm-parser/src/usfmGenerator.js index 0b50d142..b91750c3 100644 --- a/web-usfm-parser/src/usfmGenerator.js +++ b/web-usfm-parser/src/usfmGenerator.js @@ -5,7 +5,9 @@ class USFMGenerator { } usjToUsfm(usjObj, nested = false) { - + if (usjObj.type === "ref") { + usjObj.marker = "ref"; + } if (!NO_USFM_USJ_TYPES.includes(usjObj.type)) { this.usfmString += "\\"; if (nested && usjObj.type === "char") { @@ -21,6 +23,20 @@ class USFMGenerator { if (usjObj.category) { this.usfmString += `\\cat ${usjObj.category}\\cat*\n`; } + if (usjObj.altnumber) { + if (usjObj.marker === "c") { + this.usfmString += `\\ca ${usjObj.altnumber} \\ca*\n` + }else if (usjObj.marker === "v") { + this.usfmString += `\\va ${usjObj.altnumber} \\va* ` + } + } + if (usjObj.pubnumber) { + if (usjObj.marker === "c") { + this.usfmString += `\\cp ${usjObj.pubnumber}\n` + }else if (usjObj.marker === "v") { + this.usfmString += `\\vp ${usjObj.pubnumber} \\vp* ` + } + } if (Array.isArray(usjObj.content)) { usjObj.content.forEach((item) => { if (typeof item === "string") { diff --git a/web-usfm-parser/src/usjGenerator.js b/web-usfm-parser/src/usjGenerator.js index 55f369ab..980cc5a0 100644 --- a/web-usfm-parser/src/usjGenerator.js +++ b/web-usfm-parser/src/usjGenerator.js @@ -8,7 +8,7 @@ class USJGenerator { this.usfm = usfmString; this.jsonRootObj = usjRootObj || { type: "USJ", - version: "0.3.0", + version: "3.1", content: [], }; } @@ -89,15 +89,15 @@ class USJGenerator { sid: chapRef, }; - chapCap.forEach((tuple) => { - if (tuple[1] === "alt-num") { + chapCap.forEach((cap) => { + if (cap.name === "alt-num") { chapJsonObj.altnumber = this.usfm - .substring(tuple[0].startIndex, tuple[0].endIndex) + .substring(cap.node.startIndex, cap.node.endIndex) .trim(); } - if (tuple[1] === "pub-num") { + if (cap.name === "pub-num") { chapJsonObj.pubnumber = this.usfm - .substring(tuple[0].startIndex, tuple[0].endIndex) + .substring(cap.node.startIndex, cap.node.endIndex) .trim(); } }); @@ -206,7 +206,7 @@ class USJGenerator { const paraMarker = paraTagCap.node.type; if (paraMarker === "b") { - this.nodeToUSJSpecial(paraTagCap, parentJsonObj); + parentJsonObj.content.push( { type: "para", marker: paraMarker} ); } else if (!paraMarker.endsWith("Block")) { const paraJsonObj = { type: "para", marker: paraMarker, content: [] }; paraTagCap.node.children.forEach((child) => { @@ -394,7 +394,7 @@ class USJGenerator { .query("((category) @category)") .captures(node)[0]; const category = this.usfm - .substring(catCap[0].startIndex, catCap[0].endIndex) + .substring(catCap.node.startIndex, catCap.node.endIndex) .trim(); parentJsonObj.category = category; } else if (node.type === "fig") { @@ -403,17 +403,12 @@ class USJGenerator { this.nodeToUSJ(child, figJsonObj); }); parentJsonObj.content.push(figJsonObj); - } else if (node.type === "b") { - const bJsonObj = { type: "optbreak", marker: "b" }; - parentJsonObj.content.push(bJsonObj); - } else if (node.type === "usfm") { - const verJsonObj = { type: "para", marker: "usfm", content: [] }; - const version = this.usfm - .substring(node.startIndex, node.endIndex) - .replace("\\usfm", "") - .trim(); - verJsonObj.content.push(version); - parentJsonObj.content.push(verJsonObj); + } else if (node.type === "ref") { + const refJsonObj = { type: "ref", content: [] }; + node.children.slice(1, -1).forEach((child) => { + this.nodeToUSJ(child, refJsonObj); + }); + parentJsonObj.content.push(refJsonObj); } } nodeToUSJGeneric(node, parentJsonObj) { @@ -496,9 +491,10 @@ class USJGenerator { this.nodeToUSJPara(node, parentJsonObj); break; case "text": - const textVal = this.usfm + let textVal = this.usfm .substring(node.startIndex, node.endIndex) .trim(); + textVal = textVal.replace("~", " ") if (textVal !== "") { parentJsonObj.content.push(textVal); } @@ -514,17 +510,24 @@ class USJGenerator { case "esb": case "cat": case "fig": - case "usfm": + case "ref": this.nodeToUSJSpecial(node, parentJsonObj); break; + case "usfm": + break; default: - if ( + if (NOTE_MARKERS.includes(node.type)) { + this.nodeToUSJNotes(node, parentJsonObj) + } + else if ( CHAR_STYLE_MARKERS.includes(node.type) || NESTED_CHAR_STYLE_MARKERS.includes(node.type) || ["xt_standalone"].includes(node.type) ) { this.nodeToUSJChar(node, parentJsonObj); - } else if (node.type.endsWith("Attribute")) { + } else if (TABLE_CELL_MARKERS.includes(node.type)) { + this.nodeToUSJTable(node, parentJsonObj) + }else if (node.type.endsWith("Attribute")) { this.nodeToUSJAttrib(node, parentJsonObj); } else if ( PARA_STYLE_MARKERS.includes(node.type) || diff --git a/web-usfm-parser/src/utils/markers.js b/web-usfm-parser/src/utils/markers.js index 324811e8..b98012d2 100644 --- a/web-usfm-parser/src/utils/markers.js +++ b/web-usfm-parser/src/utils/markers.js @@ -112,9 +112,13 @@ export const NESTED_CHAR_STYLE_MARKERS = CHAR_STYLE_MARKERS.map( export const DEFAULT_ATTRIB_MAP = { w: "lemma", rb: "gloss", - xt: "link-href", + xt: "href", fig: "alt", - xt_standalone: "link-href", + xt_standalone: "href", + xtNested: "href", + ref: "loc", + "milestone": "who", + "k":"key" }; export const TABLE_CELL_MARKERS = ["tc", "th", "tcr", "thr"]; export const MISC_MARKERS = ["fig", "cat", "esb", "b", "ph", "pi"]; \ No newline at end of file diff --git a/web-usfm-parser/test/basic.js b/web-usfm-parser/test/basic.js new file mode 100644 index 00000000..aec2d9be --- /dev/null +++ b/web-usfm-parser/test/basic.js @@ -0,0 +1,110 @@ +// const assert = require('assert'); +// const {USFMParser} = require("../src/index"); +import assert from 'assert' +import {USFMParser} from '../src/index.js'; + +const simpleUSFM = '\\id GEN\n\\c 1\n\\p\n\\v 1 In the begining..\\v 2'; +const simpleUSJ = { + type: 'USJ', + version: '0.3.0', + content: [ + { type: 'book', marker: 'id', code: 'GEN', content: [] }, + { type: 'chapter', marker: 'c', number: '1', sid: 'GEN 1' }, + { type: 'para', marker: 'p', content: [ + {type: 'verse', marker: 'v', number: 1 }, + "In the begining..", + {type: 'verse', marker: 'v', number: 2 } + ] } + ] +} +describe("Sanity Check for the testing pipeline", () => { + + it("Parse, toUSJ and back toUSFM", async () => { + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + const usfmParser = new USFMParser(simpleUSFM); + const output = usfmParser.toUSJ() + assert.notStrictEqual(output, null, 'The result should not be null and no errors during conversion'); + + const usfm = usfmParser.usjToUsfm(output) + assert.notStrictEqual(usfm, null, 'The result should not be null and no errors during conversion'); + + + }); +}); + +describe("USFMParser Object initialization", () => { + + it("with USFM", async () => { + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + const usfmParser = new USFMParser(simpleUSFM) + assert.strictEqual(usfmParser.usfm, simpleUSFM) + + }); + + it("with USJ", async () => { + const usfmParser = new USFMParser(null, simpleUSJ) + assert.strictEqual(usfmParser.usj, simpleUSJ) + + }); + + it("with nothing", async () => { + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + let usfmParser = null; + try { + const usfmParser = new USFMParser() + + } catch(err) { + assert.strictEqual(err.message, "Missing input! Either USFM, USJ or USX is to be provided.") + } + assert.strictEqual(usfmParser, null); + }); + + it("with usfm and usj", async () => { + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + let usfmParser = null; + try { + const usfmParser = new USFMParser(simpleUSFM, simpleUSJ) + + } catch(err) { + assert.strictEqual(err.message, `Found more than one input! +Only one of USFM, USJ or USX is supported in one object.` ) + } + assert.strictEqual(usfmParser, null); + }); + + it("with usj in place of USFM", async () => { + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + let usfmParser = null; + try { + const usfmParser = new USFMParser(simpleUSJ) + + } catch(err) { + assert.strictEqual(err.message, "Invalid input for USFM. Expected a string.") + } + assert.strictEqual(usfmParser, null); + }); + + it("with usfm in place of USJ", async () => { + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + let usfmParser = null; + try { + const usfmParser = new USFMParser(null, simpleUSJ) + + } catch(err) { + assert.strictEqual(err.message, "Invalid input for USJ. Expected an object.") + } + assert.strictEqual(usfmParser, null); + }); + + it("with usj as default", async () => { + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + let usfmParser = null; + try { + const usfmParser = new USFMParser(simpleUSJ) + + } catch(err) { + assert.strictEqual(err.message, "Invalid input for USFM. Expected a string.") + } + assert.strictEqual(usfmParser, null); + }); +}); diff --git a/web-usfm-parser/test/config.js b/web-usfm-parser/test/config.js new file mode 100644 index 00000000..417816ef --- /dev/null +++ b/web-usfm-parser/test/config.js @@ -0,0 +1,207 @@ +import {glob} from 'glob'; +import fs from 'node:fs'; +import xml2js from "xml2js"; +import {USFMParser} from "../src/index.js" + +let allUsfmFiles = []; +let negativeTests = [] + +const TEST_DIR = "../tests"; + +allUsfmFiles = allUsfmFiles.concat( glob.sync(TEST_DIR+'/*/*/origin.usfm')); +allUsfmFiles = allUsfmFiles.concat( glob.sync(TEST_DIR+'/*/*/*/origin.usfm')); +// console.log(allUsfmFiles) + + + +let passFailOverrideList = { + //linkhref without - + "/paratextTests/Usfm30Usage/origin.usfm": "fail", + + // custom attribute without x- + "/paratextTests/InvalidAttributes/origin.usfm": "fail", + "/paratextTests/InvalidFigureAttributesReported/origin.usfm": "fail", + + // link attributes used without hyphen + "/paratextTests/LinkAttributesAreValid/origin.usfm": "fail", + + // significant space missing after \p , \q, \m, \b + "/paratextTests/CustomAttributesAreValid/origin.usfm": "fail", + "/paratextTests/NestingInFootnote/origin.usfm": "fail", + "/specExamples/cross-ref/origin.usfm": "fail", + "/paratextTests/MarkersMissingSpace/origin.usfm": "fail", + "/paratextTests/NestingInCrossReferences/origin.usfm": "fail", + "/special-cases/empty-para/origin.usfm": "fail", + // "/special-cases/sp/origin.usfm": "fail", + "/specExamples/extended/sidebars/origin.usfm":"fail", + + // No. of columns in table not validated by usfm-grammar + "/paratextTests/MissingColumnInTable/origin.usfm": "pass", + + // WordlistMarkerMissingFromGlossaryCitationForms from paratext. Something to do with \k or \w + "/paratextTests/WordlistMarkerMissingFromGlossaryCitationForms/origin.usfm": "pass", + + "/usfmjsTests/ts/origin.usfm": "pass", // Committee thinks these should fail though + "/usfmjsTests/chunk_footnote/origin.usfm": "pass", // Committee thinks these should fail though + "/usfmjsTests/ts_2/origin.usfm": "pass", // Committee thinks these should fail though + "/special-cases/newline-attributes/origin.usfm": "pass", // Committee thinks these should fail though + "/special-cases/empty-attributes5/origin.usfm": "pass", // Committee thinks these should fail though + + // no content in ide, rem, toc1, ip etc + "/paratextTests/NoErrorsPartiallyEmptyBook/origin.usfm": "fail", + "/paratextTests/NoErrorsEmptyBook/origin.usfm": "fail", + "/usfmjsTests/57-TIT.greek/origin.usfm": "fail", + "/paratextTests/EmptyMarkers/origin.usfm": "fail", + + // no \p (usually after \s) + "/usfmjsTests/missing_verses/origin.usfm": "fail", // has \s5 + "/usfmjsTests/isa_verse_span/origin.usfm": "fail", // has \s5 + "/usfmjsTests/isa_footnote/origin.usfm": "fail", // has \s5 + "/usfmjsTests/tit_extra_space_after_chapter/origin.usfm": "fail", // has \s5 + "/usfmjsTests/1ch_verse_span/origin.usfm": "fail", // has \s5 + "/usfmjsTests/usfmIntroTest/origin.usfm": "fail", + "/usfmjsTests/out_of_sequence_verses/origin.usfm": "fail", + "/usfmjsTests/acts_1_milestone/origin.usfm": "fail", + "/usfmjsTests/luk_quotes/origin.usfm": "fail", + "/biblica/BlankLinesWithFigures/origin.usfm": "fail", //\fig used without \p, only \b + + //no space after \s5 + "/usfmjsTests/usfmBodyTestD/origin.usfm": "fail", + "/usfmjsTests/usfm-body-testF/origin.usfm": "fail", + "/usfmjsTests/psa_quotes/origin.usfm": "fail", + "/usfmjsTests/pro_footnote/origin.usfm": "fail", + "/usfmjsTests/pro_quotes/origin.usfm": "fail", + "/samples-from-wild/doo43-1/origin.usfm": "fail", + "/usfmjsTests/gn_headers/origin.usfm": "fail", + "/usfmjsTests/isa_inline_quotes/origin.usfm": "fail", + "/usfmjsTests/job_footnote/origin.usfm": "fail", + "/usfmjsTests/mat-4-6.whitespace/origin.usfm": "fail", + "/usfmjsTests/out_of_sequence_chapters/origin.usfm": "fail", + + "/biblica/PublishingVersesWithFormatting/origin.usfm": "fail", // \c without number + + "/special-cases/figure_with_quotes_in_desc/origin.usfm": "fail", // quote within quote + "/specExamples/poetry/origin.usfm": "fail", // \b not followed by a \p or \q + + "/paratextTests/InvalidRubyMarkup/origin.usfm": "fail", // contradicts /paratextTests/MissingRequiredAttributesReported + "/special-cases/empty-book/origin.usfm": "pass", // Just says only \id is not enough. Not clear what else is mandatory + "/usfmjsTests/f10_gen12-2_empty_word/origin.usfm": "pass", // Empty \w \w* is accepted by us as of now + //########## Need to be fixed ####################### + "/paratextTests/NoErrorsShort/origin.usfm": "pass", // \c is mandatory! + // "/usfmjsTests/gn_headers/origin.usfm": "fail", # what is the valid position for mte and imt + "/usfmjsTests/acts_8-37-ugnt-footnote/origin.usfm": "fail", // no clue why it fails + + "/advanced/periph/origin.usfm": "fail", // Peripharals not implemented + "/advanced/nesting1/origin.usfm": "fail", // We dont support char within char w/o +, yet + "/samples-from-wild/doo43-4/origin.usfm": "fail", // ior surronded by a () leaves a stray ) at the end. + +}; + + +let excludeUSJs = [ + `${TEST_DIR}/biblica/CrossRefWithPipe/origin.json`, //ref object introduced which is not in usfm + `${TEST_DIR}/special-cases/empty-attributes/origin.json`, //lemma not given correctly. Issue from USX + `${TEST_DIR}/specExamples/character/origin.json`,// lit element treated as a body paragraph enclosing a verse! Issue from USX + + ] + +const initialiseParser = async function (inputUsfmPath){ + `Open and parse the given file` + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + try { + const data = fs.readFileSync(inputUsfmPath, 'utf8'); + let testParser = new USFMParser(data); + if (testParser === null) { + throw Error(`Paring failed for ${inputUsfmPath}: ${data}`) + } + return testParser; + } catch (err) { + throw err; + } +} + +const checkValidUsfm = function (inputUsfmPath) { + `Checks the metadata.xml to see is the USFM is a valid one` + if (inputUsfmPath.replace(TEST_DIR, '') in passFailOverrideList){ + if (passFailOverrideList[inputUsfmPath.replace(TEST_DIR, '')] === "pass"){ + return true + } else if (passFailOverrideList[inputUsfmPath.replace(TEST_DIR, '')] === "fail") { + return false + } + } + let value = null; + let metaFilePath = inputUsfmPath.replace("origin.usfm", "metadata.xml") + let metadata = fs.readFileSync(metaFilePath, 'utf8') + + xml2js.parseString(metadata, (err, result) => { + if (err) { + console.error('Error parsing XML:', err); + return; + } + value = result['test-metadata']['validated'][0]; + }); + + if (value === "fail"){ + return false + } + else if (value === "pass") { + return true + } else { + throw Error(`Validation read as : ${value} for ${metaFilePath}`) + + } +} + +const findAllMarkers = function (usfmStr, keepId = false, keepNumber = true) { + // Regex pattern to find all markers in the USFM string + let allMarkersInInput = [...usfmStr.matchAll(/\\\+?(([A-Za-z]+)\d*(-[se])?)/g)]; + + // Processing based on `keepNumber` flag + if (keepNumber) { + allMarkersInInput = allMarkersInInput.map(match => match[1]); + } else { + allMarkersInInput = allMarkersInInput.map(match => match[1] + match[2]); + } + + // Remove duplicates + allMarkersInInput = [...new Set(allMarkersInInput)]; + + // Remove 'id' marker if `keepId` is false + if (!keepId) { + const idIndex = allMarkersInInput.indexOf('id'); + if (idIndex !== -1) allMarkersInInput.splice(idIndex, 1); + } + + // Handle 'esbe' and 'usfm' markers + const esbeIndex = allMarkersInInput.indexOf('esbe'); + if (esbeIndex !== -1) { + const esbIndex = allMarkersInInput.indexOf('esb'); + if (esbIndex === -1) throw new Error("'esb' must be present if 'esbe' is found"); + allMarkersInInput.splice(esbeIndex, 1); + } + + const usfmIndex = allMarkersInInput.indexOf('usfm'); + if (usfmIndex !== -1) { + allMarkersInInput.splice(usfmIndex, 1); + } + + return allMarkersInInput; +} + +let isValidUsfm = {} + +allUsfmFiles.forEach((filepath) => { + isValidUsfm[filepath] = checkValidUsfm(filepath) +}); +// console.log(allUsfmFiles[0]) + +// const test_parser = initialiseParser("../tests/samples-from-wild/WEB1/origin.usfm") + + +export{ + allUsfmFiles, + initialiseParser, + isValidUsfm, + excludeUSJs, + findAllMarkers +}; diff --git a/web-usfm-parser/test/test_parsing.js b/web-usfm-parser/test/test_parsing.js new file mode 100644 index 00000000..a00788a6 --- /dev/null +++ b/web-usfm-parser/test/test_parsing.js @@ -0,0 +1,23 @@ +import assert from 'assert'; +import {allUsfmFiles, initialiseParser, isValidUsfm} from './config.js'; +import {USFMParser} from '../src/index.js'; + +describe("Check parsing pass or fail is correct", () => { + + allUsfmFiles.forEach(function(value) { + it(`Parse ${value} to ensure validity ${isValidUsfm[value]}`, async (inputUsfmPath=value) => { + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + const testParser = await initialiseParser(inputUsfmPath) + assert(testParser instanceof USFMParser) + assert(testParser.errors instanceof Array) + if (isValidUsfm[inputUsfmPath] === true) { + assert.strictEqual(testParser.errors.length, 0); + } else { + assert.notStrictEqual(testParser.errors.length, 0); + } + + + }); + + }); +}); diff --git a/web-usfm-parser/test/test_usj_conversion.js b/web-usfm-parser/test/test_usj_conversion.js new file mode 100644 index 00000000..a0b781f5 --- /dev/null +++ b/web-usfm-parser/test/test_usj_conversion.js @@ -0,0 +1,131 @@ +import assert from 'assert'; +import fs from 'node:fs'; +import {allUsfmFiles, initialiseParser, isValidUsfm, excludeUSJs, findAllMarkers} from './config.js'; +import {USFMParser} from '../src/index.js'; + +describe("Check successful USFM-USJ conversion for positive samples", () => { + + allUsfmFiles.forEach(function(value) { + if (isValidUsfm[value]) { + it(`Convert ${value} to USJ`, async (inputUsfmPath=value) => { + const testParser = await initialiseParser(inputUsfmPath) + // assert(testParser instanceof USFMParser) + const usj = testParser.toUSJ(); + assert(testParser instanceof Object); + assert.strictEqual(usj["type"], "USJ"); + assert.strictEqual(usj["version"], "3.1"); + assert.strictEqual(usj.content[0].type, "book"); + assert.strictEqual(usj.content[0].marker, "id"); + }); + } + }); +}); + + +describe("Compare generated USJ with testsuite sample", () => { + + allUsfmFiles.forEach(function(value) { + const usjPath = value.replace(".usfm", ".json"); + if (isValidUsfm[value] && ! excludeUSJs.includes(usjPath)) { + it(`Compare generated USJ to ${usjPath}`, async (inputUsfmPath=value) => { + const testParser = await initialiseParser(inputUsfmPath) + const generatedUSJ = testParser.toUSJ(); + const filePath = usjPath; + let fileData = null; + try { + fileData = fs.readFileSync(filePath, "utf8"); + } catch(err) { + if (err.code === "ENOENT") { + return + } + } + const testsuiteUSJ = JSON.parse(fileData); + stripDefaultAttribValue(testsuiteUSJ) + removeNewlinesInText(testsuiteUSJ) + stripTextValue(testsuiteUSJ) + removeNewlinesInText(generatedUSJ) + stripTextValue(generatedUSJ) + + assert.deepEqual(generatedUSJ, testsuiteUSJ); + }); + } + }); +}); + + +describe("Test USFM-USJ-USFM roundtripping", () => { + allUsfmFiles.forEach(function(value) { + if (isValidUsfm[value]) { + it(`Roundtrip ${value} via USJ`, async (inputUsfmPath=value) => { + await USFMParser.init("./tree-sitter-usfm.wasm", "./tree-sitter.wasm"); + const testParser = await initialiseParser(inputUsfmPath) + assert(testParser instanceof USFMParser) + const usj = testParser.toUSJ(); + assert(usj instanceof Object); + + const testParser2 = new USFMParser(null, usj); + const generatedUSFM = testParser2.usfm; + assert.strictEqual(typeof generatedUSFM, 'string'); + assert(generatedUSFM.startsWith("\\id")); + + const inputMarkers = findAllMarkers(testParser.usfm) + const finalMarkers = findAllMarkers(generatedUSFM) + assert.deepStrictEqual(inputMarkers, finalMarkers, `Markers in input and generated USFMs differ`) + + + + }); + } + }); + +}); + + +function stripTextValue(usjObj) { + /* Trailing and preceding space handling can be different between tcdocs and our logic. + Strip both before comparison */ + if (usjObj.hasOwnProperty("content")) { + usjObj["content"].forEach((item, index) => { + if (typeof item === 'string') { + usjObj["content"][index] = item.trim(); // Strip spaces from strings + } else { + stripTextValue(item); // Recursively handle nested objects + } + }); + } +} + +function removeNewlinesInText(usjDict) { + /* The test samples in testsuite do not preserve new lines. But we do in usfm-grammar. + So removing them just for comparison */ + if (usjDict.hasOwnProperty("content")) { + usjDict["content"].forEach((item, index) => { + if (typeof item === 'string') { + // Replace newlines with spaces + usjDict["content"][index] = item.replace(/\n/g, " "); + // Replace multiple spaces with a single space + usjDict["content"][index] = usjDict["content"][index].replace(/\s+/g, " "); + } else { + removeNewlinesInText(item); // Recursively handle nested dictionaries + } + }); + } +} + + +function stripDefaultAttribValue(usjDict) { + /* The USX samples in test suite have space in lemma values when given as default attribute */ + if (usjDict.hasOwnProperty("content")) { + usjDict["content"].forEach(item => { + if (typeof item === 'object' && !Array.isArray(item)) { + if (item["type"] === "char" && item["marker"] === "w") { + if (item.hasOwnProperty("lemma")) { + item["lemma"] = item["lemma"].trim(); // Strip spaces from 'lemma' + } + } + stripDefaultAttribValue(item); // Recursively handle nested dictionaries + } + }); + } +} +