Skip to content

Commit

Permalink
Merge pull request #673 from kevinansfield/handle-nested-html-whitespace
Browse files Browse the repository at this point in the history
Fix error and extra leading/trailing whitespace when parsing indented HTML
  • Loading branch information
mixonic committed Mar 31, 2019
2 parents c8c3af8 + ca8c6c5 commit e54f3d6
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 4 deletions.
12 changes: 12 additions & 0 deletions src/js/parsers/dom.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ export function transformHTMLText(textContent) {
return text;
}

export function trimSectionText(section) {
if (section.isMarkerable && section.markers.length) {
let { head, tail } = section.markers;
head.value = head.value.replace(/^\s+/, '');
tail.value = tail.value.replace(/\s+$/, '');
}
}

function isGoogleDocsContainer(element) {
return !isTextNode(element) &&
!isCommentNode(element) &&
Expand Down Expand Up @@ -108,6 +116,10 @@ class DOMParser {
this.appendSections(post, sections);
});

// trim leading/trailing whitespace of markerable sections to avoid
// unnessary whitespace from indented HTML input
forEach(post.sections, section => trimSectionText(section));

return post;
}

Expand Down
18 changes: 14 additions & 4 deletions src/js/parsers/section.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ import {
} from 'mobiledoc-kit/utils/array-utils';

import {
transformHTMLText
transformHTMLText,
trimSectionText
} from '../parsers/dom';

import assert from '../utils/assert';
Expand Down Expand Up @@ -238,7 +239,7 @@ class SectionParser {
let { state } = this;

const markups = this._markupsFromElement(element);
if (markups.length && state.text.length) {
if (markups.length && state.text.length && state.section.isMarkerable) {
this._createMarker();
}
state.markups.push(...markups);
Expand All @@ -247,7 +248,7 @@ class SectionParser {
this.parseNode(node);
});

if (markups.length && state.text.length) {
if (markups.length && state.text.length && state.section.isMarkerable) {
// create the marker started for this node
this._createMarker();
}
Expand Down Expand Up @@ -277,14 +278,22 @@ class SectionParser {
}

// close a trailing text node if it exists
if (state.text.length) {
if (state.text.length && state.section.isMarkerable) {
this._createMarker();
}

// push listItems onto the listSection or add a new section
if (state.section.isListItem && lastSection && lastSection.isListSection) {
trimSectionText(state.section);
lastSection.items.append(state.section);
} else {
// avoid creating empty markup sections, especially useful for indented source
if (state.section.isMarkerable && !state.section.text.trim()) {
state.section = null;
state.text = '';
return;
}

// remove empty list sections before creating a new section
if (lastSection && lastSection.isListSection && lastSection.items.length === 0) {
sections.pop();
Expand All @@ -294,6 +303,7 @@ class SectionParser {
}

state.section = null;
state.text = '';
}

_markupsFromElement(element) {
Expand Down
18 changes: 18 additions & 0 deletions tests/unit/parsers/dom-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,24 @@ test('singly-nested ol lis are parsed correctly', (assert) => {
assert.equal(section.items.objectAt(1).text, 'second element');
});

test('nested html doesn\'t create unneccessary whitespace', (assert) => {
let element = buildDOM(`
<div>
<p>
One
<p>
<p>
Two
</p>
</div>
`);
const post = parser.parse(element);

assert.equal(post.sections.length, 2, '2 sections');
assert.equal(post.sections.objectAt(0).text, 'One');
assert.equal(post.sections.objectAt(1).text, 'Two');
});

/*
* FIXME: Google docs nests uls like this
test('lis in nested uls are flattened (when ul is child of ul)', (assert) => {
Expand Down
43 changes: 43 additions & 0 deletions tests/unit/parsers/section-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,49 @@ test('#parse handles list following node handled by parserPlugin', (assert) => {
assert.equal(listSection.items.length, 1, '1 list item');
});

test('#parse handles insignificant whitespace', (assert) => {
let container = buildDOM(`
<ul>
<li>
One
</li>
</ul>
`);

let element = container.firstChild;
parser = new SectionParser(builder);
let sections = parser.parse(element);

assert.equal(sections.length, 1, '1 section');
let [list] = sections;
assert.equal(list.type, 'list-section');
assert.equal(list.items.length, 1, '1 list item');
assert.equal(list.items.objectAt(0).text, 'One');
});

test('#parse handles insignificant whitespace (wrapped)', (assert) => {
let container = buildDOM(`
<div>
<ul>
<li>
One
</li>
</ul>
</div>
`);

let element = container.firstChild;
parser = new SectionParser(builder);
let sections = parser.parse(element);

assert.equal(sections.length, 1, '1 section');
let [list] = sections;
assert.equal(list.type, 'list-section');
assert.equal(list.items.length, 1, '1 list item');
assert.equal(list.items.objectAt(0).text, 'One');
});


test('#parse avoids empty paragraph around wrapped list', (assert) => {
let container = buildDOM(`
<div><ul><li>One</li></ul></div>
Expand Down

0 comments on commit e54f3d6

Please sign in to comment.