From 304f855028cef7fccc9f6decc7585440e435fba8 Mon Sep 17 00:00:00 2001 From: David Martinez Date: Fri, 25 Aug 2023 01:30:10 +0200 Subject: [PATCH] Added tests for WebClient::setFetcherName() --- .github/workflows/tests.yml | 1 + CHANGELOG.md | 5 +++++ README.md | 14 +++++++++++--- composer.json | 2 +- src/Clients/WebClient.php | 24 ++++++++++++++++++++++-- tests/ErrorTest.php | 16 ++++++++++++++++ tests/WebTest.php | 18 ++++++++++++++++++ 7 files changed, 74 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0ad741f..97abc58 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,6 +46,7 @@ jobs: - '2.5.0' - '2.6.0' - '2.7.0' + - '2.8.0' name: PHP ${{ matrix.php }} - TIKA ${{ matrix.tika }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 5969470..a99035c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## v1.3.0 + +* Added `WebClient::setFetcherName()` to easily set the [fetcher name](https://cwiki.apache.org/confluence/display/TIKA/tika-pipes) (thanks to @relthyg) +* Tested up to Apache Tika 1.28.5 and 2.8.0 + ## v1.2.5 * Tested up to Apache Tika 1.28.5 and 2.7.0 diff --git a/README.md b/README.md index aae3c58..06de339 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ to work with the new versions of the tool. * Support for local and remote resources * No heavyweight library dependencies * Compatible with Apache Tika 1.15 or greater - * Tested up to 1.28.5 and 2.7.0 + * Tested up to 1.28.5 and 2.8.0 * Works on Linux, macOS, Windows and probably on FreeBSD ## Requirements @@ -124,8 +124,8 @@ You can use an URL instead of a file path and the library will download the file If you use Apache Tika >= 2.0.0, you *can* [define an HttpFetcher](https://cwiki.apache.org/confluence/display/TIKA/tika-pipes) and use the option `-enableUnsecureFeatures -enableFileUrl` when starting the server to make the server download remote -files when passing a URL instead of a filname to `$client->getText()`. In order to do so, you must set the name of -the HttpFetcher using `$client->setFetcherName('yourFetcherName')`. +files when passing a URL instead of a filename. In order to do so, you must set the name of the HttpFetcher using +`$client->setFetcherName('yourFetcherName')`. ### Methods @@ -190,6 +190,14 @@ $client->setDownloadRemote(true); $client->getDownloadRemote(); ``` + +Set the [fetcher name](https://cwiki.apache.org/confluence/display/TIKA/tika-pipes): + +```php +$client->setFetcherName($fetcher); // one of FileSystemFetcher, HttpFetcher, S3Fetcher, GCSFetcher, or SolrFetcher +$client->getFetcherName(); +``` + #### Command line client Set/get JAR/Java paths (only CLI mode): diff --git a/composer.json b/composer.json index daba1e3..9b6f4b3 100644 --- a/composer.json +++ b/composer.json @@ -48,7 +48,7 @@ "supported-versions": [ "1.15", "1.16", "1.17", "1.18", "1.19", "1.19.1", "1.20", "1.21", "1.22", "1.23", "1.24", "1.24.1", "1.25", "1.26", "1.27", "1.28", "1.28.1", "1.28.2", "1.28.3", "1.28.4", "1.28.5", - "2.0.0", "2.1.0", "2.2.0", "2.2.1", "2.3.0", "2.4.0", "2.5.0", "2.6.0", "2.7.0" + "2.0.0", "2.1.0", "2.2.0", "2.2.1", "2.3.0", "2.4.0", "2.5.0", "2.6.0", "2.7.0", "2.8.0" ] }, "scripts": { diff --git a/src/Clients/WebClient.php b/src/Clients/WebClient.php index 6958aea..138cf77 100644 --- a/src/Clients/WebClient.php +++ b/src/Clients/WebClient.php @@ -215,11 +215,28 @@ public function setRetries(int $retries): self return $this; } + /** + * Get the name of the fetcher to be used (for Tika >= 2.0.0 only) + * + * @return string|null + */ + public function getFetcherName(): ?string + { + return $this->fetcherName; + } + /** * Set the name of the fetcher to be used (for Tika >= 2.0.0 only) + * + * @link https://cwiki.apache.org/confluence/display/TIKA/tika-pipes */ public function setFetcherName(string $fetcherName): self { + if(!in_array($fetcherName, ['FileSystemFetcher', 'HttpFetcher', 'S3Fetcher', 'GCSFetcher', 'SolrFetcher'])) + { + throw new Exception("Fetcher name $fetcherName is invalid, see https://cwiki.apache.org/confluence/display/TIKA/tika-pipes"); + } + $this->fetcherName = $fetcherName; return $this; @@ -643,10 +660,13 @@ protected function getParameters(string $type, string $file = null): array if(!empty($file) && preg_match('/^http/', $file)) { - if($this->fetcherName) { + if($this->fetcherName) + { $headers[] = "fetcherName:$this->fetcherName"; $headers[] = "fetchKey:$file"; - } else { + } + else + { $headers[] = "fileUrl:$file"; } } diff --git a/tests/ErrorTest.php b/tests/ErrorTest.php index d8b8339..7f2a1ba 100644 --- a/tests/ErrorTest.php +++ b/tests/ErrorTest.php @@ -230,6 +230,22 @@ public function testUnsupportedChunkSize(): void } } + /** + * Test invalid fetcher name + */ + public function testUnsupportedFetcherName(): void + { + try + { + $client = Client::make('localhost', 9998); + $client->setFetcherName('UnknownFetcher'); + } + catch(Exception $exception) + { + $this->assertStringContainsString('Fetcher name UnknownFetcher is invalid', $exception->getMessage()); + } + } + /** * Test wrong request type for all clients * diff --git a/tests/WebTest.php b/tests/WebTest.php index 4b935e5..d884ee9 100644 --- a/tests/WebTest.php +++ b/tests/WebTest.php @@ -156,6 +156,24 @@ public function testSetRetries(): void $this->assertEquals(5, $client->getRetries()); } + /** + * Set fetcher name test + */ + public function testFetcherName(): void + { + if(version_compare(self::$version, '2.0.0') >= 0) + { + $client = Client::make('localhost', 9998); + $client->setFetcherName('FileSystemFetcher'); + + $this->assertEquals('FileSystemFetcher', $client->getFetcherName()); + } + else + { + $this->markTestSkipped('Apache Tika 1.x doesn\'t have tika-pipes module'); + } + } + /** * Test delayed check */