Skip to content

Commit

Permalink
Merge pull request #231 from j0k3r/feature/lowest-deps
Browse files Browse the repository at this point in the history
Define lowest working deps
  • Loading branch information
j0k3r authored Jun 2, 2020
2 parents 6acde57 + b52f9d2 commit 12e9679
Show file tree
Hide file tree
Showing 14 changed files with 507 additions and 393 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ log/graby.log
log/html.log
composer.lock
.php_cs.cache
.phpunit.result.cache
17 changes: 9 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,39 +7,37 @@ php:
- 7.4
- nightly

# run build against nightly but allow them to fail
jobs:
fast_finish: true
allow_failures:
- php: nightly
# only one build will send the coverage, this'll speed up other one
include:
- php: 7.3
env: PHPUNIT_FLAGS="--coverage-clover build/logs/clover.xml" CS_FIXER=run
- php: 7.2
env: GUZZLE5=run
- php: 7.2
env: CURL=run
- php: 7.2
env: COMPOSER_FLAGS="--prefer-lowest"

# cache vendor dirs
cache:
directories:
- vendor
- $HOME/.composer/cache
- $HOME/.composer/cache/files

before_install:
- if [ -n "$GH_TOKEN" ]; then composer config github-oauth.github.com ${GH_TOKEN}; fi;

# because php-coveralls require Guzzle 6
- if [ "$GUZZLE5" = "run" ]; then composer remove php-coveralls/php-coveralls php-http/guzzle6-adapter --dev -n ; fi;
- if [ "$GUZZLE5" = "run" ]; then composer remove guzzlehttp/guzzle php-http/guzzle6-adapter --dev -n ; fi;
- if [ "$GUZZLE5" = "run" ]; then composer require php-http/guzzle5-adapter '^2.0' --dev -n ; fi;

- if [ "$CURL" = "run" ]; then composer remove php-http/guzzle6-adapter --dev -n ; fi;
- if [ "$CURL" = "run" ]; then composer require php-http/curl-client '2.x-dev' --dev -n ; fi;
- if [ "$CURL" = "run" ]; then composer require zendframework/zend-diactoros --dev -n ; fi;

install:
- composer up --no-interaction --prefer-dist -o
- composer update --prefer-dist --no-progress --no-suggest -o $COMPOSER_FLAGS

before_script:
- echo 'date.timezone = "Europe/Paris"' >> ~/.phpenv/versions/$(phpenv version-name)/etc/conf.d/travis.ini
Expand All @@ -53,5 +51,8 @@ script:
after_success:
- |
if [[ "$PHPUNIT_FLAGS" != "" ]]; then
travis_retry php vendor/bin/php-coveralls -v -x build/logs/clover.xml
wget https://github.com/php-coveralls/php-coveralls/releases/download/v2.2.0/php-coveralls.phar
chmod +x php-coveralls.phar
php php-coveralls.phar -v -x build/logs/clover.xml
fi
40 changes: 21 additions & 19 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,29 @@
"php": ">=7.1",
"ext-curl": "*",
"ext-tidy": "*",
"j0k3r/php-readability": "^1.1",
"j0k3r/graby-site-config": "^1.0",
"simplepie/simplepie": "^1.3.1",
"fossar/htmlawed": "^1.2.4",
"symfony/options-resolver": "~2.6|~3.0|~4.0|~5.0",
"monolog/monolog": "^1.13.1|^2.0",
"smalot/pdfparser": "~0.11",
"true/punycode": "~2.1",
"psr/http-message": "^1.0",
"php-http/httplug": "^2.0",
"php-http/discovery": "^1.0",
"fossar/htmlawed": "^1.2.7",
"guzzlehttp/psr7": "^1.5",
"j0k3r/graby-site-config": "^1.0.100",
"j0k3r/httplug-ssrf-plugin": "^2.0",
"j0k3r/php-readability": "^1.2.3",
"monolog/monolog": "^1.18.0|^2.0",
"php-http/client-common": "^2.0",
"php-http/discovery": "^1.5",
"php-http/httplug": "^2.0",
"php-http/message": "^1.7",
"j0k3r/httplug-ssrf-plugin": "^2.0",
"guzzlehttp/psr7": "^1.5"
"simplepie/simplepie": "^1.5",
"smalot/pdfparser": "^0.15.1",
"symfony/options-resolver": "^3.4|^4.0|^5.0",
"true/punycode": "^2.1"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "~2.14",
"symfony/phpunit-bridge": "~2.6|~3.0|~4.0|^5.0",
"php-http/mock-client": "^1.2",
"friendsofphp/php-cs-fixer": "^2.16",
"guzzlehttp/guzzle": "^6.3.0",
"php-http/guzzle6-adapter": "^2.0",
"php-coveralls/php-coveralls": "^2.0",
"phpstan/phpstan": "^0.11",
"phpstan/phpstan-phpunit": "^0.11"
"php-http/mock-client": "^1.3",
"phpstan/phpstan": "^0.12",
"phpstan/phpstan-phpunit": "^0.12",
"symfony/phpunit-bridge": "^5.1"
},
"extra": {
"branch-alias": {
Expand All @@ -57,5 +56,8 @@
"psr-4": {
"Tests\\Graby\\": "tests/"
}
},
"config": {
"sort-packages": true
}
}
14 changes: 7 additions & 7 deletions phpstan.neon
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@ includes:
parameters:
# https://github.com/phpstan/phpstan/issues/694#issuecomment-350724288
autoload_files:
- vendor/bin/.phpunit/phpunit-7.4-0/vendor/autoload.php
- vendor/bin/.phpunit/phpunit-8.3-0/vendor/autoload.php

ignoreErrors:
# because we check for some HTTP client to exist or not (Guzzle 5/6 & cURL)
-
message: '#Instantiated class (.*) not found.#'
path: %currentWorkingDirectory%/tests/Extractor/HttpClientTest.php
-
message: '#Http\\Adapter\\Guzzle5\\Client\\|Http\\Adapter\\Guzzle6\\Client\\|Http\\Client\\Curl\\Client given#'
path: %currentWorkingDirectory%/tests/Extractor/HttpClientTest.php
# because DOMXpath->query can return false
# we don't want to BC by defining typehint everywhere
# TODO: remove when jumping to 3.0
-
message: '#between false and DOMNodeList will always evaluate to false#'
path: %currentWorkingDirectory%/src/Extractor/ContentExtractor.php
message: '#typehint specified.#'
path: %currentWorkingDirectory%/src/

inferPrivatePropertyTypeFromConstructor: true
checkMissingIterableValueType: false
checkGenericClassInNonGenericObjectType: false
5 changes: 0 additions & 5 deletions phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@
bootstrap="vendor/autoload.php"
>

<php>
<env name="SYMFONY_DEPRECATIONS_HELPER" value="weak" />
<env name="SYMFONY_PHPUNIT_VERSION" value="7.4" />
</php>

<testsuites>
<testsuite name="Graby Test Suite">
<directory>./tests</directory>
Expand Down
61 changes: 42 additions & 19 deletions src/Extractor/ContentExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ public function process($html, $url, SiteConfig $siteConfig = null, $smartTidy =
$this->readability = $this->getReadability($html, $url, $parser, $this->siteConfig->tidy() && $smartTidy);
$tidied = $this->readability->tidied;

$this->logger->info('Body size after Readability: {length}', ['length' => \strlen($this->readability->dom->saveXML())]);
$this->logger->info('Body size after Readability: {length}', ['length' => \strlen((string) $this->readability->dom->saveXML())]);
$this->logger->debug('Body after Readability', ['dom_saveXML' => $this->readability->dom->saveXML()]);

// we use xpath to find elements in the given HTML document
Expand Down Expand Up @@ -369,7 +369,7 @@ public function process($html, $url, SiteConfig $siteConfig = null, $smartTidy =

// try to get body
foreach ($this->siteConfig->body as $pattern) {
$this->logger->info('Trying {pattern} for body (content length: {content_length})', ['pattern' => $pattern, 'content_length' => \strlen($this->readability->dom->saveXML())]);
$this->logger->info('Trying {pattern} for body (content length: {content_length})', ['pattern' => $pattern, 'content_length' => \strlen((string) $this->readability->dom->saveXML())]);

$res = $this->extractBody(
true,
Expand Down Expand Up @@ -409,7 +409,7 @@ public function process($html, $url, SiteConfig $siteConfig = null, $smartTidy =
// check for hentry
$elems = $this->xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' hentry ')]", $this->readability->dom);

if ($this->hasElements($elems)) {
if (false !== $elems && $this->hasElements($elems)) {
$this->logger->info('hNews: found hentry');
$hentry = $elems->item(0);

Expand Down Expand Up @@ -728,21 +728,28 @@ protected function addAuthor($authorDirty)
/**
* Check if given node list exists and has length more than 0.
*
* @param \DOMNodeList|false $elems Not force typed because it can also be false
*
* @return bool
*/
private function hasElements(\DOMNodeList $elems)
private function hasElements($elems = false)
{
if (false === $elems) {
return false;
}

return $elems->length > 0;
}

/**
* Remove elements.
*
* @param string $logMessage
* @param \DOMNodeList|false $elems Not force typed because it can also be false
* @param string $logMessage
*/
private function removeElements(\DOMNodeList $elems, $logMessage = null)
private function removeElements($elems = false, $logMessage = null)
{
if (false === $this->hasElements($elems)) {
if (false === $elems || false === $this->hasElements($elems)) {
return;
}

Expand Down Expand Up @@ -1185,22 +1192,28 @@ private function extractOpenGraph(\DOMXPath $xpath)
$metas = $xpath->query('//*/meta[starts-with(@property, \'og:\')]');
$ogMetas = [];
foreach ($metas as $meta) {
$property = str_replace(':', '_', (string) $meta->getAttribute('property'));
if (false !== $metas) {
foreach ($metas as $meta) {
if (!$meta instanceof \DOMElement) {
continue;
}
$property = str_replace(':', '_', (string) $meta->getAttribute('property'));

if (\in_array($property, ['og_image', 'og_image_url', 'og_image_secure_url'], true)) {
// avoid image data:uri to avoid sending too much data
// also, take the first og:image which is usually the best one
if (0 === stripos($meta->getAttribute('content'), 'data:image') || !empty($ogMetas[$property])) {
continue;
}

$ogMetas[$property] = $meta->getAttribute('content');

if (\in_array($property, ['og_image', 'og_image_url', 'og_image_secure_url'], true)) {
// avoid image data:uri to avoid sending too much data
// also, take the first og:image which is usually the best one
if (0 === stripos($meta->getAttribute('content'), 'data:image') || !empty($ogMetas[$property])) {
continue;
}

$ogMetas[$property] = $meta->getAttribute('content');

continue;
}

$ogMetas[$property] = $meta->getAttribute('content');
}

$this->logger->info('Opengraph "og:" data: {ogData}', ['ogData' => $ogMetas]);
Expand Down Expand Up @@ -1230,8 +1243,14 @@ private function extractOpenGraph(\DOMXPath $xpath)
$metas = $xpath->query('//*/meta[starts-with(@property, \'article:\')]');
$articleMetas = [];
foreach ($metas as $meta) {
$articleMetas[str_replace(':', '_', (string) $meta->getAttribute('property'))] = $meta->getAttribute('content');
if (false !== $metas) {
foreach ($metas as $meta) {
if (!$meta instanceof \DOMElement) {
continue;
}
$articleMetas[str_replace(':', '_', (string) $meta->getAttribute('property'))] = $meta->getAttribute('content');
}
}

$this->logger->info('Opengraph "article:" data: {ogData}', ['ogData' => $articleMetas]);
Expand Down Expand Up @@ -1274,6 +1293,10 @@ private function extractJsonLdInformation(\DOMXPath $xpath)
{
$scripts = $xpath->query('//*/script[@type="application/ld+json"]');

if (false === $scripts) {
return null;
}

$ignoreNames = [];
$candidateNames = [];

Expand Down
Loading

0 comments on commit 12e9679

Please sign in to comment.