Skip to content

Commit

Permalink
Fix few problems reported by PHPStan (#192)
Browse files Browse the repository at this point in the history
* Fix few problems reported by PHPStan

* Fix UsesContent

* Introduce PHPScraperConfig array shape

* Add array type of __call
  • Loading branch information
szepeviktor authored Aug 27, 2023
1 parent 0ae9f24 commit 7e8cb3b
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 230 deletions.
210 changes: 0 additions & 210 deletions phpstan-baseline.neon
Original file line number Diff line number Diff line change
@@ -1,215 +1,5 @@
parameters:
ignoreErrors:
-
message: "#^Dead catch \\- Exception is never thrown in the try block\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:castType\\(\\) has no return type specified\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:cleanOutlineWithParagraphs\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:cleanParagraphs\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:contentKeywords\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:contentKeywordsWithScores\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:csvDecode\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:csvDecodeRaw\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:csvDecodeWithHeader\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:csvDecodeWithHeaderRaw\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:externalLinks\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h1\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h2\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h3\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h4\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h5\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:h6\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:headers\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:headings\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:images\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:imagesWithDetails\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:internalLinks\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:keywords\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:links\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:linksWithDetails\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:lists\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:metaTags\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:outlineWithParagraphs\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:parseCsv\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:parseCsvWithHeader\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:parseJson\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:parseXml\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:rssRaw\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:searchIndexRaw\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:sitemapRaw\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:viewport\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\Core\\:\\:xmlDecode\\(\\) return type has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^PHPDoc tag @var for variable \\$array has no value type specified in iterable type array\\.$#"
count: 1
path: src/Core.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:__call\\(\\) has parameter \\$arguments with no value type specified in iterable type array\\.$#"
count: 1
path: src/PHPScraper.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:__construct\\(\\) has parameter \\$config with no value type specified in iterable type array\\.$#"
count: 1
path: src/PHPScraper.php

-
message: "#^Method Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:setConfig\\(\\) has parameter \\$config with no value type specified in iterable type array\\.$#"
count: 1
path: src/PHPScraper.php

-
message: "#^Property Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:\\$config type has no value type specified in iterable type array\\.$#"
count: 1
path: src/PHPScraper.php

-
message: "#^Access to an undefined property Spekulatius\\\\PHPScraper\\\\PHPScraper\\:\\:\\$baseHref\\.$#"
count: 2
Expand Down
9 changes: 5 additions & 4 deletions phpstan.neon
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ includes:
parameters:
level: 9
paths:
- src
- tests
excludePaths:
- vendor
- src/
- tests/
tmpDir: ./.tmp/phpstan/
ignoreErrors:
# TODO Add those return types.
- '#return type has no value type specified in iterable type array\.$#'
20 changes: 13 additions & 7 deletions src/PHPScraper.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,29 @@
use Symfony\Component\BrowserKit\HttpBrowser;
use Symfony\Component\HttpClient\HttpClient as SymfonyHttpClient;

/**
* @phpstan-type PHPScraperConfig array{'follow_redirects'?: bool, 'follow_meta_refresh'?: bool, 'max_redirects'?: int, 'agent'?: string, 'proxy'?: string|null, 'timeout'?: int, 'disable_ssl'?: bool}
*/
class PHPScraper
{
/**
* Holds the config for the clients.
*
* @var array
* @var PHPScraperConfig
*/
protected $config = [];

/**
* Holds the Core class. It handles the actual scraping.
*
* @var \Spekulatius\PHPScraper\Core;
* @var \Spekulatius\PHPScraper\Core
*/
protected $core = null;

public function __construct(?array $config = [])
/**
* @param PHPScraperConfig $config
*/
public function __construct(array $config = [])
{
// Prepare the core. It delegates all further processing.
$this->core = new Core;
Expand All @@ -39,9 +45,9 @@ public function __construct(?array $config = [])
/**
* Sets the config, generates the required Clients and updates the core with the new clients.
*
* @param ?array $config = []
* @param PHPScraperConfig $config
*/
public function setConfig(?array $config = []): self
public function setConfig(array $config = []): self
{
// Define the default values
$defaults = [
Expand Down Expand Up @@ -86,7 +92,7 @@ public function setConfig(?array $config = []): self
];

// Add the defaults in
$this->config = array_merge($defaults, $config ?? []);
$this->config = array_merge($defaults, $config);

// Symfony HttpClient
$httpClient = SymfonyHttpClient::create([
Expand Down Expand Up @@ -126,7 +132,7 @@ public function __get(string $name)
* Catches the method calls and tries to satisfy them.
*
* @param string $name
* @param array $arguments
* @param array<mixed> $arguments
* @return mixed
*/
public function __call(string $name, array $arguments = [])
Expand Down
19 changes: 12 additions & 7 deletions src/UsesContent.php
Original file line number Diff line number Diff line change
Expand Up @@ -303,17 +303,22 @@ public function outlineWithParagraphs(): array
*/
public function cleanOutlineWithParagraphs(): array
{
$result = $this->filterExtractAttributes('//h1|//h2|//h3|//h4|//h5|//h6|//p', ['_name', '_text']);
$elementsNameAndText = $this->filterExtractAttributes('//h1|//h2|//h3|//h4|//h5|//h6|//p', ['_name', '_text']);

/** @var array $array */
foreach ($result as $index => $array) {
if ($array[1] !== '') {
$result[$index] = array_combine(['tag', 'content'], $array);
$result[$index]['content'] = trim($result[$index]['content']);
/** @var array<string> $nameAndText */
foreach ($elementsNameAndText as $index => $nameAndText) {
// Element has no text.
if ($nameAndText[1] === '') {
continue;
}

$elementsNameAndText[$index] = [
'tag' => $nameAndText[0],
'content' => trim($nameAndText[1]),
];
}

return $result;
return $elementsNameAndText;
}

/**
Expand Down
8 changes: 6 additions & 2 deletions src/UsesFileParsers.php
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,11 @@ public function csvDecodeWithHeader(
return $csv;
}

// Helper method to cast types
/**
* Helper method to cast types
*
* @return int|float|string
*/
public function castType(string $entry)
{
// Looks like an int?
Expand Down Expand Up @@ -279,7 +283,7 @@ public function parseJson(?string $jsonStringOrUrl = null): array
if ($jsonStringOrUrl !== null) {
// Simple: Try to parse what we have been given
try {
$result = json_decode($jsonStringOrUrl, true);
$result = json_decode($jsonStringOrUrl, true, 512, JSON_THROW_ON_ERROR);
} catch (\Exception $e) {
// We don't do anything if it fails - likely we have an URL. Let's continue below.
}
Expand Down

0 comments on commit 7e8cb3b

Please sign in to comment.