Skip to content

Commit

Permalink
Merge pull request #1011 from thephpleague/commonmark-spec-0.31
Browse files Browse the repository at this point in the history
CommonMark spec 0.31 compatibility
  • Loading branch information
colinodell authored Jul 22, 2024
2 parents 50bd4dc + a0cd1cd commit b81cc1d
Show file tree
Hide file tree
Showing 15 changed files with 120 additions and 74 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,23 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi

## [Unreleased][unreleased]

### Changed

- Made compatible with CommonMark spec 0.31.0, including:
- Allow closing fence to be followed by tabs
- Remove restrictive limitation on inline comments
- Unicode symbols now treated like punctuation (for purposes of flankingness)
- Trailing tabs on the last line of indented code blocks will be excluded
- Improved HTML comment matching
- `Paragraph`s only containing link reference definitions will be kept in the AST until the `Document` is finalized
- (These were previously removed immediately after parsing the `Paragraph`)

### Fixed

- Fixed list tightness not being determined properly in some edge cases
- Fixed incorrect ending line numbers for several block types in various scenarios
- Fixed lowercase inline HTML declarations not being accepted

## [2.4.4] - 2024-07-22

### Fixed
Expand Down
12 changes: 6 additions & 6 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
"require-dev": {
"ext-json": "*",
"cebe/markdown": "^1.0",
"commonmark/cmark": "0.30.3",
"commonmark/commonmark.js": "0.30.0",
"commonmark/cmark": "0.31.0",
"commonmark/commonmark.js": "0.31.0",
"composer/package-versions-deprecated": "^1.8",
"embed/embed": "^4.4",
"erusev/parsedown": "^1.0",
Expand All @@ -56,9 +56,9 @@
"type": "package",
"package": {
"name": "commonmark/commonmark.js",
"version": "0.30.0",
"version": "0.31.0",
"dist": {
"url": "https://github.com/commonmark/commonmark.js/archive/0.30.0.zip",
"url": "https://github.com/commonmark/commonmark.js/archive/0.31.0.zip",
"type": "zip"
}
}
Expand All @@ -67,9 +67,9 @@
"type": "package",
"package": {
"name": "commonmark/cmark",
"version": "0.30.3",
"version": "0.31.0",
"dist": {
"url": "https://github.com/commonmark/cmark/archive/0.30.3.zip",
"url": "https://github.com/commonmark/cmark/archive/0.31.0.zip",
"type": "zip"
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/Extension/CommonMark/Node/Block/ListBlock.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ListBlock extends AbstractBlock implements TightBlockInterface
public const DELIM_PERIOD = 'period';
public const DELIM_PAREN = 'paren';

protected bool $tight = false;
protected bool $tight = false; // TODO Make lists tight by default in v3

/** @psalm-readonly */
protected ListData $listData;
Expand Down
2 changes: 1 addition & 1 deletion src/Extension/CommonMark/Parser/Block/FencedCodeParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public function tryContinue(Cursor $cursor, BlockContinueParserInterface $active
{
// Check for closing code fence
if (! $cursor->isIndented() && $cursor->getNextNonSpaceCharacter() === $this->block->getChar()) {
$match = RegexHelper::matchFirst('/^(?:`{3,}|~{3,})(?= *$)/', $cursor->getLine(), $cursor->getNextNonSpacePosition());
$match = RegexHelper::matchFirst('/^(?:`{3,}|~{3,})(?=[ \t]*$)/', $cursor->getLine(), $cursor->getNextNonSpacePosition());
if ($match !== null && \strlen($match[0]) >= $this->block->getLength()) {
// closing fence - we're at end of line, so we can finalize now
return BlockContinue::finished();
Expand Down
19 changes: 6 additions & 13 deletions src/Extension/CommonMark/Parser/Block/IndentedCodeParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,21 +63,14 @@ public function addLine(string $line): void

public function closeBlock(): void
{
$reversed = \array_reverse($this->strings->toArray(), true);
foreach ($reversed as $index => $line) {
if ($line !== '' && $line !== "\n" && ! \preg_match('/^(\n *)$/', $line)) {
break;
}
$lines = $this->strings->toArray();

unset($reversed[$index]);
// Note that indented code block cannot be empty, so $lines will always have at least one non-empty element
while (\preg_match('/^[ \t]*$/', \end($lines))) { // @phpstan-ignore-line
\array_pop($lines);
}

$fixed = \array_reverse($reversed);
$tmp = \implode("\n", $fixed);
if (\substr($tmp, -1) !== "\n") {
$tmp .= "\n";
}

$this->block->setLiteral($tmp);
$this->block->setLiteral(\implode("\n", $lines) . "\n");
$this->block->setEndLine($this->block->getStartLine() + \count($lines) - 1);
}
}
62 changes: 38 additions & 24 deletions src/Extension/CommonMark/Parser/Block/ListBlockParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ final class ListBlockParser extends AbstractBlockContinueParser
/** @psalm-readonly */
private ListBlock $block;

private bool $hadBlankLine = false;

private int $linesAfterBlank = 0;

public function __construct(ListData $listData)
{
$this->block = new ListBlock($listData);
Expand All @@ -48,32 +44,50 @@ public function isContainer(): bool

public function canContain(AbstractBlock $childBlock): bool
{
if (! $childBlock instanceof ListItem) {
return false;
}

// Another list item is being added to this list block.
// If the previous line was blank, that means this list
// block is "loose" (not tight).
if ($this->hadBlankLine && $this->linesAfterBlank === 1) {
$this->block->setTight(false);
$this->hadBlankLine = false;
}

return true;
return $childBlock instanceof ListItem;
}

public function tryContinue(Cursor $cursor, BlockContinueParserInterface $activeBlockParser): ?BlockContinue
{
if ($cursor->isBlank()) {
$this->hadBlankLine = true;
$this->linesAfterBlank = 0;
} elseif ($this->hadBlankLine) {
$this->linesAfterBlank++;
}

// List blocks themselves don't have any markers, only list items. So try to stay in the list.
// If there is a block start other than list item, canContain makes sure that this list is closed.
return BlockContinue::at($cursor);
}

public function closeBlock(): void
{
$item = $this->block->firstChild();
while ($item instanceof AbstractBlock) {
// check for non-final list item ending with blank line:
if ($item->next() !== null && self::endsWithBlankLine($item)) {
$this->block->setTight(false);
break;
}

// recurse into children of list item, to see if there are spaces between any of them
$subitem = $item->firstChild();
while ($subitem instanceof AbstractBlock) {
if ($subitem->next() && self::endsWithBlankLine($subitem)) {
$this->block->setTight(false);
break 2;
}

$subitem = $subitem->next();
}

$item = $item->next();
}

$lastChild = $this->block->lastChild();
if ($lastChild instanceof AbstractBlock) {
$this->block->setEndLine($lastChild->getEndLine());
}
}

private static function endsWithBlankLine(AbstractBlock $block): bool
{
$next = $block->next();

return $next instanceof AbstractBlock && $block->getEndLine() !== $next->getStartLine() - 1;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public function tryStart(Cursor $cursor, MarkdownParserStateInterface $parserSta
if (! ($matched instanceof ListBlockParser) || ! $listData->equals($matched->getBlock()->getListData())) {
$listBlockParser = new ListBlockParser($listData);
// We start out with assuming a list is tight. If we find a blank line, we set it to loose later.
// TODO for 3.0: Just make them tight by default in the block so we can remove this call
$listBlockParser->getBlock()->setTight(true);

return BlockStart::of($listBlockParser, $listItemParser)->at($cursor);
Expand Down
30 changes: 11 additions & 19 deletions src/Extension/CommonMark/Parser/Block/ListItemParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@

namespace League\CommonMark\Extension\CommonMark\Parser\Block;

use League\CommonMark\Extension\CommonMark\Node\Block\ListBlock;
use League\CommonMark\Extension\CommonMark\Node\Block\ListData;
use League\CommonMark\Extension\CommonMark\Node\Block\ListItem;
use League\CommonMark\Node\Block\AbstractBlock;
use League\CommonMark\Node\Block\Paragraph;
use League\CommonMark\Parser\Block\AbstractBlockContinueParser;
use League\CommonMark\Parser\Block\BlockContinue;
use League\CommonMark\Parser\Block\BlockContinueParserInterface;
Expand All @@ -28,8 +26,6 @@ final class ListItemParser extends AbstractBlockContinueParser
/** @psalm-readonly */
private ListItem $block;

private bool $hadBlankLine = false;

public function __construct(ListData $listData)
{
$this->block = new ListItem($listData);
Expand All @@ -47,18 +43,7 @@ public function isContainer(): bool

public function canContain(AbstractBlock $childBlock): bool
{
if ($this->hadBlankLine) {
// We saw a blank line in this list item, that means the list block is loose.
//
// spec: if any of its constituent list items directly contain two block-level elements with a blank line
// between them
$parent = $this->block->parent();
if ($parent instanceof ListBlock) {
$parent->setTight(false);
}
}

return true;
return ! $childBlock instanceof ListItem;
}

public function tryContinue(Cursor $cursor, BlockContinueParserInterface $activeBlockParser): ?BlockContinue
Expand All @@ -69,9 +54,6 @@ public function tryContinue(Cursor $cursor, BlockContinueParserInterface $active
return BlockContinue::none();
}

$activeBlock = $activeBlockParser->getBlock();
// If the active block is a code block, blank lines in it should not affect if the list is tight.
$this->hadBlankLine = $activeBlock instanceof Paragraph || $activeBlock instanceof ListItem;
$cursor->advanceToNextNonSpaceOrTab();

return BlockContinue::at($cursor);
Expand All @@ -87,4 +69,14 @@ public function tryContinue(Cursor $cursor, BlockContinueParserInterface $active
// Note: We'll hit this case for lazy continuation lines, they will get added later.
return BlockContinue::none();
}

public function closeBlock(): void
{
if (($lastChild = $this->block->lastChild()) instanceof AbstractBlock) {
$this->block->setEndLine($lastChild->getEndLine());
} else {
// Empty list item
$this->block->setEndLine($this->block->getStartLine());
}
}
}
2 changes: 2 additions & 0 deletions src/Node/Block/Paragraph.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@

class Paragraph extends AbstractBlock
{
/** @internal */
public bool $onlyContainsLinkReferenceDefinitions = false;
}
27 changes: 27 additions & 0 deletions src/Parser/Block/DocumentBlockParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

use League\CommonMark\Node\Block\AbstractBlock;
use League\CommonMark\Node\Block\Document;
use League\CommonMark\Node\Block\Paragraph;
use League\CommonMark\Parser\Cursor;
use League\CommonMark\Reference\ReferenceMapInterface;

Expand Down Expand Up @@ -50,4 +51,30 @@ public function tryContinue(Cursor $cursor, BlockContinueParserInterface $active
{
return BlockContinue::at($cursor);
}

public function closeBlock(): void
{
$this->removeLinkReferenceDefinitions();
}

private function removeLinkReferenceDefinitions(): void
{
$emptyNodes = [];

$walker = $this->document->walker();
while ($event = $walker->next()) {
$node = $event->getNode();
// TODO for v3: It would be great if we could find an alternate way to identify such paragraphs.
// Unfortunately, we can't simply check for empty paragraphs here because inlines haven't been processed yet,
// meaning all paragraphs will appear blank here, and we don't have a way to check the status of the reference parser
// which is attached to the (already-closed) paragraph parser.
if ($event->isEntering() && $node instanceof Paragraph && $node->onlyContainsLinkReferenceDefinitions) {
$emptyNodes[] = $node;
}
}

foreach ($emptyNodes as $node) {
$node->detach();
}
}
}
4 changes: 1 addition & 3 deletions src/Parser/Block/ParagraphParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ public function addLine(string $line): void

public function closeBlock(): void
{
if ($this->referenceParser->hasReferences() && $this->referenceParser->getParagraphContent() === '') {
$this->block->detach();
}
$this->block->onlyContainsLinkReferenceDefinitions = $this->referenceParser->hasReferences() && $this->referenceParser->getParagraphContent() === '';
}

public function parseInlines(InlineParserEngineInterface $inlineParser): void
Expand Down
2 changes: 1 addition & 1 deletion src/Parser/MarkdownParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ private function parseLine(string $line): void
} else {
// finalize any blocks not matched
if ($unmatchedBlocks > 0) {
$this->closeBlockParsers($unmatchedBlocks, $this->lineNumber);
$this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1);
}

if (! $blockParser->isContainer()) {
Expand Down
6 changes: 3 additions & 3 deletions src/Util/RegexHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ final class RegexHelper
public const PARTIAL_CLOSETAG = '<\/' . self::PARTIAL_TAGNAME . '\s*[>]';
public const PARTIAL_OPENBLOCKTAG = '<' . self::PARTIAL_BLOCKTAGNAME . self::PARTIAL_ATTRIBUTE . '*' . '\s*\/?>';
public const PARTIAL_CLOSEBLOCKTAG = '<\/' . self::PARTIAL_BLOCKTAGNAME . '\s*[>]';
public const PARTIAL_HTMLCOMMENT = '<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->';
public const PARTIAL_HTMLCOMMENT = '<!-->|<!--->|<!--[\s\S]*?-->';
public const PARTIAL_PROCESSINGINSTRUCTION = '[<][?][\s\S]*?[?][>]';
public const PARTIAL_DECLARATION = '<![A-Z]+' . '[^>]*>';
public const PARTIAL_DECLARATION = '<![A-Za-z]+' . '[^>]*>';
public const PARTIAL_CDATA = '<!\[CDATA\[[\s\S]*?]\]>';
public const PARTIAL_HTMLTAG = '(?:' . self::PARTIAL_OPENTAG . '|' . self::PARTIAL_CLOSETAG . '|' . self::PARTIAL_HTMLCOMMENT . '|' .
self::PARTIAL_PROCESSINGINSTRUCTION . '|' . self::PARTIAL_DECLARATION . '|' . self::PARTIAL_CDATA . ')';
Expand All @@ -65,7 +65,7 @@ final class RegexHelper
'|' . '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*\'' .
'|' . '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^()\x00])*\))';

public const REGEX_PUNCTUATION = '/^[\x{2000}-\x{206F}\x{2E00}-\x{2E7F}\p{Pc}\p{Pd}\p{Pe}\p{Pf}\p{Pi}\p{Po}\p{Ps}\\\\\'!"#\$%&\(\)\*\+,\-\.\\/:;<=>\?@\[\]\^_`\{\|\}~]/u';
public const REGEX_PUNCTUATION = '/^[!"#$%&\'()*+,\-.\\/:;<=>?@\\[\\]\\\\^_`{|}~\p{P}\p{S}]/u';
public const REGEX_UNSAFE_PROTOCOL = '/^javascript:|vbscript:|file:|data:/i';
public const REGEX_SAFE_DATA_PROTOCOL = '/^data:image\/(?:png|gif|jpeg|webp)/i';
public const REGEX_NON_SPACE = '/[^ \t\f\v\r\n]/';
Expand Down
4 changes: 3 additions & 1 deletion tests/functional/CommonMarkJSRegressionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ public static function dataProvider(): \Generator
{
$tests = SpecReader::readFile(__DIR__ . '/../../vendor/commonmark/commonmark.js/test/regression.txt');
foreach ($tests as $example) {
// We can't currently render spec example 18 exactly how the upstream library does. We'll likely need to overhaul
// We can't currently render spec examples 18 or 24 exactly how the upstream library does. We'll likely need to overhaul
// our rendering approach in order to fix that, so we'll use this temporary workaround for now.
if ($example['number'] === 18) {
$example['output'] = \str_replace('</script></li>', "</script>\n</li>", $example['output']);
} elseif ($example['number'] === 24) {
$example['output'] = \str_replace("<pre>The following line is part of HTML block.\n\n</li>", "<pre>The following line is part of HTML block.\n</li>", $example['output']);
}

yield $example;
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/Util/RegexHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,9 @@ public function testHtmlComment(): void
$this->assertRegexMatches($regex, '<!---->');
$this->assertRegexMatches($regex, '<!-- -->');
$this->assertRegexMatches($regex, '<!-- HELLO WORLD -->');
$this->assertRegexMatches($regex, '<!-->');
$this->assertRegexMatches($regex, '<!--->');
$this->assertRegexDoesNotMatch($regex, '<!->');
$this->assertRegexDoesNotMatch($regex, '<!-->');
$this->assertRegexDoesNotMatch($regex, '<!--->');
$this->assertRegexDoesNotMatch($regex, '<!- ->');
}

Expand Down

0 comments on commit b81cc1d

Please sign in to comment.