Skip to content

Commit

Permalink
Merge branch '1.x'
Browse files Browse the repository at this point in the history
  • Loading branch information
vaites committed Aug 24, 2023
2 parents 18bd11a + 304f855 commit cbba9b4
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 6 deletions.
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ jobs:
- '2.5.0'
- '2.6.0'
- '2.7.0'
- '2.8.0'

name: PHP ${{ matrix.php }} - TIKA ${{ matrix.tika }}

Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## v1.3.0

* Added `WebClient::setFetcherName()` to easily set the [fetcher name](https://cwiki.apache.org/confluence/display/TIKA/tika-pipes) (thanks to @relthyg)
* Tested up to Apache Tika 1.28.5 and 2.8.0

## v1.2.5

* Tested up to Apache Tika 1.28.5 and 2.7.0
Expand Down
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ to work with the new versions of the tool.
* Support for local and remote resources
* No heavyweight library dependencies
* Compatible with Apache Tika 1.15 or greater
* Tested up to 1.28.5 and 2.7.0
* Tested up to 1.28.5 and 2.8.0
* Works on Linux, macOS, Windows and probably on FreeBSD

## Requirements
Expand Down Expand Up @@ -124,8 +124,8 @@ You can use an URL instead of a file path and the library will download the file

If you use Apache Tika >= 2.0.0, you *can* [define an HttpFetcher](https://cwiki.apache.org/confluence/display/TIKA/tika-pipes)
and use the option `-enableUnsecureFeatures -enableFileUrl` when starting the server to make the server download remote
files when passing a URL instead of a filname to `$client->getText()`. In order to do so, you must set the name of
the HttpFetcher using `$client->setFetcherName('yourFetcherName')`.
files when passing a URL instead of a filename. In order to do so, you must set the name of the HttpFetcher using
`$client->setFetcherName('yourFetcherName')`.

### Methods

Expand Down Expand Up @@ -190,6 +190,14 @@ $client->setDownloadRemote(true);
$client->getDownloadRemote();
```


Set the [fetcher name](https://cwiki.apache.org/confluence/display/TIKA/tika-pipes):

```php
$client->setFetcherName($fetcher); // one of FileSystemFetcher, HttpFetcher, S3Fetcher, GCSFetcher, or SolrFetcher
$client->getFetcherName();
```

#### Command line client

Set/get JAR/Java paths (only CLI mode):
Expand Down
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"supported-versions": [
"1.15", "1.16", "1.17", "1.18", "1.19", "1.19.1", "1.20", "1.21", "1.22", "1.23", "1.24", "1.24.1",
"1.25", "1.26", "1.27", "1.28", "1.28.1", "1.28.2", "1.28.3", "1.28.4", "1.28.5",
"2.0.0", "2.1.0", "2.2.0", "2.2.1", "2.3.0", "2.4.0", "2.5.0", "2.6.0", "2.7.0"
"2.0.0", "2.1.0", "2.2.0", "2.2.1", "2.3.0", "2.4.0", "2.5.0", "2.6.0", "2.7.0", "2.8.0"
]
},
"scripts": {
Expand Down
24 changes: 22 additions & 2 deletions src/Clients/WebClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,28 @@ public function setRetries(int $retries): self
return $this;
}

/**
* Get the name of the fetcher to be used (for Tika >= 2.0.0 only)
*
* @return string|null
*/
public function getFetcherName(): ?string
{
return $this->fetcherName;
}

/**
* Set the name of the fetcher to be used (for Tika >= 2.0.0 only)
*
* @link https://cwiki.apache.org/confluence/display/TIKA/tika-pipes
*/
public function setFetcherName(string $fetcherName): self
{
if(!in_array($fetcherName, ['FileSystemFetcher', 'HttpFetcher', 'S3Fetcher', 'GCSFetcher', 'SolrFetcher']))
{
throw new Exception("Fetcher name $fetcherName is invalid, see https://cwiki.apache.org/confluence/display/TIKA/tika-pipes");
}

$this->fetcherName = $fetcherName;

return $this;
Expand Down Expand Up @@ -643,10 +660,13 @@ protected function getParameters(string $type, string $file = null): array

if(!empty($file) && preg_match('/^http/', $file))
{
if($this->fetcherName) {
if($this->fetcherName)
{
$headers[] = "fetcherName:$this->fetcherName";
$headers[] = "fetchKey:$file";
} else {
}
else
{
$headers[] = "fileUrl:$file";
}
}
Expand Down
16 changes: 16 additions & 0 deletions tests/ErrorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,22 @@ public function testUnsupportedChunkSize(): void
}
}

/**
* Test invalid fetcher name
*/
public function testUnsupportedFetcherName(): void
{
try
{
$client = Client::make('localhost', 9998);
$client->setFetcherName('UnknownFetcher');
}
catch(Exception $exception)
{
$this->assertStringContainsString('Fetcher name UnknownFetcher is invalid', $exception->getMessage());
}
}

/**
* Test wrong request type for all clients
*
Expand Down
18 changes: 18 additions & 0 deletions tests/WebTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,24 @@ public function testSetRetries(): void
$this->assertEquals(5, $client->getRetries());
}

/**
* Set fetcher name test
*/
public function testFetcherName(): void
{
if(version_compare(self::$version, '2.0.0') >= 0)
{
$client = Client::make('localhost', 9998);
$client->setFetcherName('FileSystemFetcher');

$this->assertEquals('FileSystemFetcher', $client->getFetcherName());
}
else
{
$this->markTestSkipped('Apache Tika 1.x doesn\'t have tika-pipes module');
}
}

/**
* Test delayed check
*/
Expand Down

0 comments on commit cbba9b4

Please sign in to comment.