Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(fix) can extract with self-closing component before it #251

Merged
merged 2 commits into from
Jun 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion packages/language-server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
"estree-walker": "^2.0.1",
"lodash": "^4.17.10",
"magic-string": "^0.25.3",
"parse5": "^5.1.0",
"prettier": "2.0.5",
"prettier-plugin-svelte": "1.1.0",
"source-map": "^0.7.3",
Expand Down
177 changes: 87 additions & 90 deletions packages/language-server/src/lib/documents/utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { clamp, isInRange } from '../../utils';
import { clamp, isInRange, regexLastIndexOf } from '../../utils';
import { Position, Range } from 'vscode-languageserver';
import parse5, { Location } from 'parse5';
import { Node, getLanguageService } from 'vscode-html-languageservice';

export interface TagInformation {
content: string;
Expand All @@ -12,43 +12,44 @@ export interface TagInformation {
container: { start: number; end: number };
}

function parseAttributes(attrlist: { name: string; value: string }[]): Record<string, string> {
function parseAttributes(
rawAttrs: Record<string, string | null> | undefined,
): Record<string, string> {
const attrs: Record<string, string> = {};
attrlist.forEach((attr) => {
attrs[attr.name] = attr.value === '' ? attr.name : attr.value; // in order to support boolean attributes (see utils.test.ts)
if (!rawAttrs) {
return attrs;
}

Object.keys(rawAttrs).forEach((attrName) => {
const attrValue = rawAttrs[attrName];
attrs[attrName] = attrValue === null ? attrName : removeOuterQuotes(attrValue);
});
return attrs;
}

function isMatchingTag(source: string, node: ParsedNode, tag: string): boolean {
if (node.nodeName !== tag) {
return false;
function removeOuterQuotes(attrValue: string) {
if (
(attrValue.startsWith('"') && attrValue.endsWith('"')) ||
(attrValue.startsWith("'") && attrValue.endsWith("'"))
) {
return attrValue.slice(1, attrValue.length - 1);
}
return attrValue;
}
}

// node name equals tag, but we still have to check for case sensitivity
const orgStart = node.sourceCodeLocation?.startTag.startOffset || 0;
const orgEnd = node.sourceCodeLocation?.startTag.endOffset || 0;
const tagHtml = source.substring(orgStart, orgEnd);
return tagHtml.startsWith(`<${tag}`);
const parser = getLanguageService();
function parseHtml(text: string) {
// We can safely only set getText because only this is used for parsing
return parser.parseHTMLDocument(<any>{ getText: () => text });
}

// parse5's DefaultTreeNode type is insufficient; make our own type to make TS happy
type ParsedNode = {
nodeName: string;
tagName: string;
value?: string;
attrs: { name: string; value: string }[];
childNodes: ParsedNode[];
parentNode: ParsedNode;
sourceCodeLocation: Location & { startTag: Location; endTag: Location };
};

const regexIf = new RegExp('{#if\\s(.*?)*}', 'igms');
const regexIf = new RegExp('{#if\\s.*?}', 'igms');
const regexIfEnd = new RegExp('{/if}', 'igms');
const regexEach = new RegExp('{#each\\s(.*?)*}', 'igms');
const regexEach = new RegExp('{#each\\s.*?}', 'igms');
const regexEachEnd = new RegExp('{/each}', 'igms');
const regexAwait = new RegExp('{#await\\s(.*?)*}', 'igms');
const regexAwait = new RegExp('{#await\\s.*?}', 'igms');
const regexAwaitEnd = new RegExp('{/await}', 'igms');
const regexHtml = new RegExp('{@html\\s.*?', 'igms');

/**
* Extracts a tag (style or script) from the given text
Expand All @@ -57,76 +58,72 @@ const regexAwaitEnd = new RegExp('{/await}', 'igms');
* @param source text content to extract tag from
* @param tag the tag to extract
*/
function extractTags(source: string, tag: 'script' | 'style'): TagInformation[] {
const { childNodes } = parse5.parseFragment(source, {
sourceCodeLocationInfo: true,
}) as { childNodes: ParsedNode[] };

const matchedNodes: ParsedNode[] = [];
let currentSvelteDirective;
for (const node of childNodes) {
/**
* skip matching tags if we are inside a directive
*
* extractTag's goal is solely to identify the top level <script> or <style>.
*
* therefore only iterating through top level childNodes is a feature we want!
*
* however, we cannot do a naive childNodes.find() because context matters.
* if we have a <script> tag inside an {#if}, we want to skip that until the {/if}.
* if we have a <script> tag inside an {#each}, we want to skip that until the {/each}.
* if we have a <script> tag inside an {#await}, we want to skip that until the {/await}.
*
* and so on. So we use a tiny inSvelteDirective 'state machine' to track this
* and use regex to detect the svelte directives.
* We might need to improve this regex in future.
*/
if (currentSvelteDirective) {
if (node.value && node.nodeName === '#text') {
if (
(currentSvelteDirective === 'if' && regexIfEnd.exec(node.value)) ||
(currentSvelteDirective === 'each' && regexEachEnd.exec(node.value)) ||
(currentSvelteDirective === 'await' && regexAwaitEnd.exec(node.value))
) {
currentSvelteDirective = undefined;
}
}
} else {
if (node.value && node.nodeName === '#text') {
// potentially a svelte directive
if (regexIf.exec(node.value)) currentSvelteDirective = 'if';
else if (regexEach.exec(node.value)) currentSvelteDirective = 'each';
else if (regexAwait.exec(node.value)) currentSvelteDirective = 'await';
} else if (isMatchingTag(source, node, tag)) {
matchedNodes.push(node);
}
}
function extractTags(text: string, tag: 'script' | 'style'): TagInformation[] {
const rootNodes = parseHtml(text).roots;
const matchedNodes = rootNodes
.filter((node) => node.tag === tag)
.filter((tag) => {
return isNotInsideControlFlowTag(tag) && isNotInsideHtmlTag(tag);
});
return matchedNodes.map(transformToTagInfo);

/**
* For every match AFTER the tag do a search for `{/X`.
* If that is BEFORE `{#X`, we are inside a moustache tag.
*/
function isNotInsideControlFlowTag(tag: Node) {
const nodes = rootNodes.slice(rootNodes.indexOf(tag));
const rootContentAfterTag = nodes
.map((node, idx) => {
return text.substring(node.end, nodes[idx + 1]?.start);
})
.join('');

return ![
[regexIf, regexIfEnd],
[regexEach, regexEachEnd],
[regexAwait, regexAwaitEnd],
].some((pair) => {
pair[0].lastIndex = 0;
pair[1].lastIndex = 0;
const start = pair[0].exec(rootContentAfterTag);
const end = pair[1].exec(rootContentAfterTag);
return (end?.index ?? text.length) < (start?.index ?? text.length);
});
}

return matchedNodes.map(transformToTagInfo);
/**
* For every match BEFORE the tag do a search for `{@html`.
* If that is BEFORE `}`, we are inside a moustache tag.
*/
function isNotInsideHtmlTag(tag: Node) {
const nodes = rootNodes.slice(0, rootNodes.indexOf(tag));
const rootContentBeforeTag = [{ start: 0, end: 0 }, ...nodes]
.map((node, idx) => {
return text.substring(node.end, nodes[idx]?.start);
})
.join('');

return !(
regexLastIndexOf(rootContentBeforeTag, regexHtml) >
rootContentBeforeTag.lastIndexOf('}')
);
}

function transformToTagInfo(matchedNode: ParsedNode) {
const SCL = matchedNode.sourceCodeLocation; // shorthand
const attributes = parseAttributes(matchedNode.attrs);
/**
* Note: `content` will only show top level child node content.
* This is ok given that extractTag is only meant to extract top level
* <style> and <script> tags. But if that ever changes we may have to make this
* recurse and concat all childnodes.
*/
const content = matchedNode.childNodes[0]?.value || '';
const start = SCL.startTag.endOffset;
const end = SCL.endTag.startOffset;
const startPos = positionAt(start, source);
const endPos = positionAt(end, source);
function transformToTagInfo(matchedNode: Node) {
const start = matchedNode.startTagEnd ?? matchedNode.start;
const end = matchedNode.endTagStart ?? matchedNode.end;
const startPos = positionAt(start, text);
const endPos = positionAt(end, text);
const container = {
start: SCL.startTag.startOffset,
end: SCL.endTag.endOffset,
start: matchedNode.start,
end: matchedNode.end,
};
const content = text.substring(start, end);

return {
content,
attributes,
attributes: parseAttributes(matchedNode.attributes),
start,
end,
startPos,
Expand Down
40 changes: 31 additions & 9 deletions packages/language-server/test/lib/documents/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,28 @@ describe('document/utils', () => {
assert.deepStrictEqual(attributes, { type: 'typescript' });
});

it('can extract with self-closing component before it', () => {
const extracted = extractStyleTag('<SelfClosing /><style></style>');
assert.deepStrictEqual(extracted, {
start: 22,
end: 22,
startPos: {
character: 22,
line: 0,
},
endPos: {
character: 22,
line: 0,
},
attributes: {},
content: '',
container: {
end: 30,
start: 15,
},
});
});

it('extracts style tag', () => {
const text = `
<p>bla</p>
Expand Down Expand Up @@ -118,30 +140,30 @@ describe('document/utils', () => {
const text = `
{#if name}
<script>
console.log('not top level')
console.log('if not top level')
</script>
{/if}
<ul>
{#each cats as cat}
<script>
console.log('not top level')
console.log('each not top level')
</script>
{/each}
</ul>
{#await promise}
<script>
console.log('not top level')
console.log('await not top level')
</script>
{:then number}
<script>
console.log('not top level')
console.log('then not top level')
</script>
{:catch error}
<script>
console.log('not top level')
console.log('catch not top level')
</script>
{/await}
<p>{@html <script> consolelog('not top level')</script>}</p>
<p>{@html <script> console.log('html not top level')</script>}</p>
{@html mycontent}
{@debug myvar}
<!-- p{ color: blue; }</script> -->
Expand All @@ -156,11 +178,11 @@ describe('document/utils', () => {
assert.deepStrictEqual(extractScriptTags(text)?.script, {
content: 'top level script',
attributes: {},
start: 1212,
end: 1228,
start: 1243,
end: 1259,
startPos: Position.create(34, 24),
endPos: Position.create(34, 40),
container: { start: 1204, end: 1237 },
container: { start: 1235, end: 1268 },
});
});

Expand Down