Skip to content

Commit

Permalink
Fix implied URL parsing
Browse files Browse the repository at this point in the history
Fixes microformats#183. Adds a test based on that issue. Fixes an old test that
accidentally tests for the child microformats case.
  • Loading branch information
Zegnat committed Aug 7, 2018
1 parent 35fee8b commit bdff0e7
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 28 deletions.
51 changes: 24 additions & 27 deletions Mf2/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -1083,35 +1083,32 @@ public function parseH(\DOMElement $e, $is_backcompat = false, $has_nested_mf =

}

// Check for u-url
if (!array_key_exists('url', $return) && !$is_backcompat) {
$url = null;
// Look for img @src
if ($e->tagName == 'a' or $e->tagName == 'area') {
$url = $e->getAttribute('href');
}

// Look for nested a @href
foreach ($this->xpath->query('./a[count(preceding-sibling::a)+count(following-sibling::a)=0]', $e) as $em) {
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames)) {
$url = $em->getAttribute('href');
break;
}
}

// Look for nested area @src
foreach ($this->xpath->query('./area[count(preceding-sibling::area)+count(following-sibling::area)=0]', $e) as $em) {
$emNames = mfNamesFromElement($em, 'h-');
if (empty($emNames)) {
$url = $em->getAttribute('href');
break;
// Do we need to imply a url property?
// if no explicit "url" property, and no other explicit u-* properties, and no nested microformats
if (!array_key_exists('url', $return) && !in_array('u-', $prefixes) && !$has_nested_mf && !$is_backcompat) {
// a.h-x[href] or area.h-x[href]
if (($e->tagName === 'a' || $e->tagName === 'area') && $e->hasAttribute('href')) {
$return['url'][] = $this->resolveUrl($e->getAttribute('href'));
} else {
$xpaths = array(
// .h-x>a[href]:only-of-type:not[.h-*]
'./a[not(contains(concat(" ", @class), " h-")) and count(../a) = 1 and @href]',
// .h-x>area[href]:only-of-type:not[.h-*]
'./area[not(contains(concat(" ", @class), " h-")) and count(../area) = 1 and @href]',
// .h-x>:only-child:not[.h-*]>a[href]:only-of-type:not[.h-*]
'./*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(a) = 1]/a[not(contains(concat(" ", @class), " h-")) and @href]',
// .h-x>:only-child:not[.h-*]>area[href]:only-of-type:not[.h-*]
'./*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(area) = 1]/area[not(contains(concat(" ", @class), " h-")) and @href]'
);
foreach ($xpaths as $xpath) {
$url = $this->xpath->query($xpath, $e);
// var_dump($xpath, $url->item(0)->getAttribute('href'));
if ($url->length === 1) {
$return['url'][] = $this->resolveUrl($url->item(0)->getAttribute('href'));
break;
}
}
}

if (!is_null($url)) {
$return['url'][] = $this->resolveUrl($url);
}
}

// Make sure things are unique and in alphabetical order
Expand Down
2 changes: 1 addition & 1 deletion tests/Mf2/ClassicMicroformatsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ public function testMixedMf2andMf1Case3() {
$parser = new Parser($input);
$result = $parser->parse();

$this->assertCount(3, $result['items'][0]['properties']);
$this->assertCount(2, $result['items'][0]['properties']);
$this->assertArrayNotHasKey('street-address', $result['items'][0]['properties']);
$this->assertArrayNotHasKey('locality', $result['items'][0]['properties']);
$this->assertArrayNotHasKey('country-name', $result['items'][0]['properties']);
Expand Down
13 changes: 13 additions & 0 deletions tests/Mf2/ParseImpliedTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -331,5 +331,18 @@ public function testBackcompatNoImpliedUrl() {
$this->assertArrayHasKey('content', $result['items'][0]['properties']);
}


/**
* Don't imply u-url if there are other u-*
* @see http://microformats.org/wiki/microformats2-parsing#parsing_for_implied_properties
* @see https://github.com/microformats/php-mf2/issues/183
*/
public function testNoImpliedUrl() {
$input = '<div class="h-entry"> <h1 class="p-name"><a href="https://example.com/this-post">Title</a></h1> <div class="e-content"> <p> blah blah blah </p> </div> <a href="https://example.org/syndicate" class="u-syndication"></a> </div>';
$result = Mf2\parse($input);

$this->assertArrayNotHasKey('url', $result['items'][0]['properties']);
}

}

0 comments on commit bdff0e7

Please sign in to comment.