diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index 1d16099..0e46631 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -1,215 +1,5 @@ parameters: ignoreErrors: - - - message: "#^Dead catch \\- Exception is never thrown in the try block\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:castType\\(\\) has no return type specified\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:cleanOutlineWithParagraphs\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:cleanParagraphs\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:contentKeywords\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:contentKeywordsWithScores\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:csvDecode\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:csvDecodeRaw\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:csvDecodeWithHeader\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:csvDecodeWithHeaderRaw\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:externalLinks\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h1\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h2\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h3\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h4\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h5\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h6\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:headers\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:headings\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:images\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:imagesWithDetails\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:internalLinks\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:keywords\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:links\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:linksWithDetails\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:lists\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:metaTags\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:outlineWithParagraphs\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:parseCsv\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:parseCsvWithHeader\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:parseJson\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:parseXml\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:rssRaw\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:searchIndexRaw\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:sitemapRaw\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:viewport\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:xmlDecode\\(\\) return type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^PHPDoc tag @var for variable \\$array has no value type specified in iterable type array\\.$#" - count: 1 - path: src/Core.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:__call\\(\\) has parameter \\$arguments with no value type specified in iterable type array\\.$#" - count: 1 - path: src/PHPScraper.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:__construct\\(\\) has parameter \\$config with no value type specified in iterable type array\\.$#" - count: 1 - path: src/PHPScraper.php - - - - message: "#^Method Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:setConfig\\(\\) has parameter \\$config with no value type specified in iterable type array\\.$#" - count: 1 - path: src/PHPScraper.php - - - - message: "#^Property Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:\\$config type has no value type specified in iterable type array\\.$#" - count: 1 - path: src/PHPScraper.php - - message: "#^Access to an undefined property Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:\\$baseHref\\.$#" count: 2 diff --git a/phpstan.neon b/phpstan.neon index 1f03ea9..c606fb6 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -4,8 +4,9 @@ includes: parameters: level: 9 paths: - - src - - tests - excludePaths: - - vendor + - src/ + - tests/ tmpDir: ./.tmp/phpstan/ + ignoreErrors: + # TODO Add those return types. + - '#return type has no value type specified in iterable type array\.$#' diff --git a/src/PHPScraper.php b/src/PHPScraper.php index 998915d..6a3ff58 100644 --- a/src/PHPScraper.php +++ b/src/PHPScraper.php @@ -11,23 +11,29 @@ use Symfony\Component\BrowserKit\HttpBrowser; use Symfony\Component\HttpClient\HttpClient as SymfonyHttpClient; +/** + * @phpstan-type PHPScraperConfig array{'follow_redirects'?: bool, 'follow_meta_refresh'?: bool, 'max_redirects'?: int, 'agent'?: string, 'proxy'?: string|null, 'timeout'?: int, 'disable_ssl'?: bool} + */ class PHPScraper { /** * Holds the config for the clients. * - * @var array + * @var PHPScraperConfig */ protected $config = []; /** * Holds the Core class. It handles the actual scraping. * - * @var \Spekulatius\PHPScraper\Core; + * @var \Spekulatius\PHPScraper\Core */ protected $core = null; - public function __construct(?array $config = []) + /** + * @param PHPScraperConfig $config + */ + public function __construct(array $config = []) { // Prepare the core. It delegates all further processing. $this->core = new Core; @@ -39,9 +45,9 @@ public function __construct(?array $config = []) /** * Sets the config, generates the required Clients and updates the core with the new clients. * - * @param ?array $config = [] + * @param PHPScraperConfig $config */ - public function setConfig(?array $config = []): self + public function setConfig(array $config = []): self { // Define the default values $defaults = [ @@ -86,7 +92,7 @@ public function setConfig(?array $config = []): self ]; // Add the defaults in - $this->config = array_merge($defaults, $config ?? []); + $this->config = array_merge($defaults, $config); // Symfony HttpClient $httpClient = SymfonyHttpClient::create([ @@ -126,7 +132,7 @@ public function __get(string $name) * Catches the method calls and tries to satisfy them. * * @param string $name - * @param array $arguments + * @param array $arguments * @return mixed */ public function __call(string $name, array $arguments = []) diff --git a/src/UsesContent.php b/src/UsesContent.php index 142cc63..cf59d08 100644 --- a/src/UsesContent.php +++ b/src/UsesContent.php @@ -303,17 +303,22 @@ public function outlineWithParagraphs(): array */ public function cleanOutlineWithParagraphs(): array { - $result = $this->filterExtractAttributes('//h1|//h2|//h3|//h4|//h5|//h6|//p', ['_name', '_text']); + $elementsNameAndText = $this->filterExtractAttributes('//h1|//h2|//h3|//h4|//h5|//h6|//p', ['_name', '_text']); - /** @var array $array */ - foreach ($result as $index => $array) { - if ($array[1] !== '') { - $result[$index] = array_combine(['tag', 'content'], $array); - $result[$index]['content'] = trim($result[$index]['content']); + /** @var array $nameAndText */ + foreach ($elementsNameAndText as $index => $nameAndText) { + // Element has no text. + if ($nameAndText[1] === '') { + continue; } + + $elementsNameAndText[$index] = [ + 'tag' => $nameAndText[0], + 'content' => trim($nameAndText[1]), + ]; } - return $result; + return $elementsNameAndText; } /** diff --git a/src/UsesFileParsers.php b/src/UsesFileParsers.php index 8ce7aec..9fcb1e7 100644 --- a/src/UsesFileParsers.php +++ b/src/UsesFileParsers.php @@ -133,7 +133,11 @@ public function csvDecodeWithHeader( return $csv; } - // Helper method to cast types + /** + * Helper method to cast types + * + * @return int|float|string + */ public function castType(string $entry) { // Looks like an int? @@ -279,7 +283,7 @@ public function parseJson(?string $jsonStringOrUrl = null): array if ($jsonStringOrUrl !== null) { // Simple: Try to parse what we have been given try { - $result = json_decode($jsonStringOrUrl, true); + $result = json_decode($jsonStringOrUrl, true, 512, JSON_THROW_ON_ERROR); } catch (\Exception $e) { // We don't do anything if it fails - likely we have an URL. Let's continue below. }