Skip to content

Commit

Permalink
Update keepEmptyParagraphs option to handle multimedia content in par…
Browse files Browse the repository at this point in the history
…agraphs
  • Loading branch information
VadimKovalenkoSNF committed Jul 25, 2023
1 parent 34eb0bb commit a31c398
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 15 deletions.
21 changes: 9 additions & 12 deletions src/util/saveArticles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -941,19 +941,16 @@ export function applyOtherTreatments(parsoidDoc: DominoElement, dump: Dump) {
})

/* Remove empty paragraphs */
// TODO: This option should be applied for page/html or/and page/mobile-html endpoints
if (!dump.opts.keepEmptyParagraphs) {
// Mobile view === details
// Desktop view === section
const sections: DominoElement[] = Array.from(parsoidDoc.querySelectorAll('details, section'))
for (const section of sections) {
if (
section.children.length ===
Array.from(section.children).filter((child: DominoElement) => {
return child.matches('summary')
}).length
) {
DU.deleteNode(section)
const paragraphs: DominoElement[] = Array.from(parsoidDoc.querySelectorAll('p'))
const mediaTags = ['img', 'video', 'audio', 'embed', 'object', 'iframe', 'canvas', 'svg', 'picture', 'track', 'source']
for (const paragraph of paragraphs) {
const hasNoMediaContent = !mediaTags.some((tag) => paragraph.querySelector(tag))
// Check if no media content inside first
if (hasNoMediaContent) {
if (!paragraph.textContent || (paragraph.textContent && paragraph.textContent.trim().length === 0)) {
DU.deleteNode(paragraph)
}
}
}
}
Expand Down
3 changes: 0 additions & 3 deletions test/unit/saveArticles.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ describe('saveArticles', () => {
let dump2: Dump
let articleHtml: string

// TODO: This test will cause issues with intergational tests of multimedia content
/*
beforeEach(async () => {
const classes = await setupScrapeClasses({ mwUrl: 'https://en.wikivoyage.org' }) // en wikipedia
dump = classes.dump
Expand Down Expand Up @@ -96,7 +94,6 @@ describe('saveArticles', () => {
const paragraphs = Array.from(doc.querySelectorAll('p'))
expect(paragraphs.length).toEqual(4)
})
*/

/*
TODO: Investigate empty section behavior for other endpoints such as page/html and page/mobile html
Expand Down

0 comments on commit a31c398

Please sign in to comment.