Skip to content

Commit

Permalink
Merge pull request #246 from Kdecherf/singlepage-rebuild-site-config
Browse files Browse the repository at this point in the history
  • Loading branch information
j0k3r authored Jan 20, 2021
2 parents 83ff469 + 898a6be commit e115409
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 1 deletion.
14 changes: 13 additions & 1 deletion src/Graby.php
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,19 @@ private function getSinglePage($html, $url)
// check it's not what we have already!
if (false !== $singlePageUrl && $singlePageUrl !== $url) {
// it's not, so let's try to fetch it...
$response = $this->httpClient->fetch($singlePageUrl, false, $siteConfig->http_header);
$headers = $siteConfig->http_header;

$sourceUrl = parse_url($url);
$targetUrl = parse_url($singlePageUrl);
if (\is_array($sourceUrl)
&& \is_array($targetUrl)
&& \array_key_exists('host', $sourceUrl)
&& \array_key_exists('host', $targetUrl)
&& $sourceUrl['host'] !== $targetUrl['host']) {
$targetSiteConfig = $this->configBuilder->buildForHost($targetUrl['host']);
$headers = $targetSiteConfig->http_header;
}
$response = $this->httpClient->fetch($singlePageUrl, false, $headers);

if ($response['status'] < 300) {
$this->logger->info('Single page content found with url', ['url' => $singlePageUrl]);
Expand Down
29 changes: 29 additions & 0 deletions tests/GrabyTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,35 @@ public function testSinglePageMimeAction(): void
$this->assertFalse($res['native_ad']);
}

public function testSinglePageReloadSiteConfig(): void
{
DnsMock::withMockedHosts([
'singlepage2.com' => [['type' => 'A', 'ip' => self::AN_IPV4]],
'singlepage5.com' => [['type' => 'A', 'ip' => self::AN_IPV4]],
]);

$httpMockClient = new HttpMockClient();
$httpMockClient->addResponse(new Response(
200,
['Content-Type' => 'text/html'],
'<html><h1 class="print-title">my title</h1><div class="print-submitted">my singlepage2</div><ul><li class="service-links-print"><a href="http://singlepage5.com/hello" class="service-links-print">printed view</a></li></ul></html>'
));
$httpMockClient->addResponse(new Response(
200,
['Content-Type' => 'text/html'],
'<html><h1 class="print-title">my title</h1><div class="main-article">my singlepage5</div></html>'
));

$graby = new Graby(['debug' => true, 'xss_filter' => false, 'extractor' => ['config_builder' => [
'site_config' => [__DIR__ . '/fixtures/site_config'],
]]], $httpMockClient);

$res = $graby->fetchContent('http://singlepage2.com/hello');

$this->assertStringContainsString('my singlepage5', $res['html']);
$this->assertSame('http://singlepage5.com/hello', $res['url']);
}

/**
* @group dns-sensitive
*/
Expand Down
3 changes: 3 additions & 0 deletions tests/fixtures/site_config/singlepage5.com.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
title: //h1[@class='print-title']
body: //div[@class='main-article']
prune: no

0 comments on commit e115409

Please sign in to comment.