diff --git a/CHANGELOG.md b/CHANGELOG.md index b3b25f906f..a73ad86b3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ All notable changes to this project will be documented in this file based on the - The disable_coord parameter of the bool and common_terms queries has been removed. If provided, it will be ignored and issue a deprecation warning. [#1369](https://github.com/ruflin/Elastica/pull/1369) - [Unfiltered nested source](https://github.com/elastic/elasticsearch/pull/26102) should keep its full path [#1366](https://github.com/ruflin/Elastica/pull/1366) - [Analyze Explain](https://www.elastic.co/guide/en/elasticsearch/reference/6.0/_explain_analyze.html) no more support [request parameters](https://www.elastic.co/guide/en/elasticsearch/reference/5.5/indices-analyze.html), use request body instead. [#1370](https://github.com/ruflin/Elastica/pull/1370) +- [Mapper Attachment plugin has been removed](https://github.com/elastic/elasticsearch/pull/20416) Use Ingest-attachment plugin and attachment processors with pipeline to ingest new documents. [#1375](https://github.com/ruflin/Elastica/pull/1375) ### Bugfixes - Enforce [Content-Type requirement on the layer Rest](https://github.com/elastic/elasticsearch/pull/23146), a [PR on Elastica #1301](https://github.com/ruflin/Elastica/issues/1301) solved it (it has been implemented only in the HTTP Transport), but it was not implemented in the Guzzle Transport. [#1349](https://github.com/ruflin/Elastica/pull/1349) diff --git a/test/Elastica/IndexTest.php b/test/Elastica/IndexTest.php index 776276a789..99fdafe487 100644 --- a/test/Elastica/IndexTest.php +++ b/test/Elastica/IndexTest.php @@ -118,172 +118,6 @@ public function testParent() $this->assertEquals(['title' => 'Foo bar'], $resultSet->current()->getData()); } - /** - * @group functional - */ - public function testAddPdfFile() - { - $this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]'); - $indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text']]; - - $indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]]; - - $index = $this->_createIndex(); - $type = new Type($index, 'test'); - - $index->create($indexParams, true); - $type->setMapping($indexMapping); - - $doc1 = new Document(1); - $doc1->addFile('file', BASE_PATH.'/data/test.pdf', 'application/pdf'); - $doc1->set('text', 'basel world'); - $type->addDocument($doc1); - - $doc2 = new Document(2); - $doc2->set('text', 'running in basel'); - $type->addDocument($doc2); - - $index->forcemerge(); - - $resultSet = $type->search('xodoa'); - $this->assertEquals(1, $resultSet->count()); - - $resultSet = $type->search('basel'); - $this->assertEquals(2, $resultSet->count()); - - // Author is ruflin - $resultSet = $type->search('ruflin'); - $this->assertEquals(1, $resultSet->count()); - - // String does not exist in file - $resultSet = $type->search('guschti'); - $this->assertEquals(0, $resultSet->count()); - } - - /** - * @group functional - */ - public function testAddPdfFileContent() - { - $this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]'); - $indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text']]; - - $indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]]; - - $index = $this->_createIndex(); - $type = new Type($index, 'test'); - - $index->create($indexParams, true); - $type->setMapping($indexMapping); - - $doc1 = new Document(1); - $doc1->addFileContent('file', file_get_contents(BASE_PATH.'/data/test.pdf')); - $doc1->set('text', 'basel world'); - $type->addDocument($doc1); - - $doc2 = new Document(2); - $doc2->set('text', 'running in basel'); - $type->addDocument($doc2); - - $index->forcemerge(); - - $resultSet = $type->search('xodoa'); - $this->assertEquals(1, $resultSet->count()); - - $resultSet = $type->search('basel'); - $this->assertEquals(2, $resultSet->count()); - - // Author is ruflin - $resultSet = $type->search('ruflin'); - $this->assertEquals(1, $resultSet->count()); - - // String does not exist in file - $resultSet = $type->search('guschti'); - $this->assertEquals(0, $resultSet->count()); - } - - /** - * @group functional - */ - public function testAddWordxFile() - { - $this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]'); - $indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text']]; - - $indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]]; - - $index = $this->_createIndex(); - $type = new Type($index, 'content'); - - $index->create($indexParams, true); - $type->setMapping($indexMapping); - - $doc1 = new Document(1); - $doc1->addFile('file', BASE_PATH.'/data/test.docx'); - $doc1->set('text', 'basel world'); - $type->addDocument($doc1); - - $index->forcemerge(); - $index->refresh(); - - $doc2 = new Document(2); - $doc2->set('text', 'running in basel'); - $type->addDocument($doc2); - - $index->forcemerge(); - $index->refresh(); - - $resultSet = $type->search('basel'); - $this->assertEquals(2, $resultSet->count()); - - $resultSet = $type->search('ruflin'); - $this->assertEquals(0, $resultSet->count()); - - $resultSet = $type->search('Xodoa'); - $this->assertEquals(1, $resultSet->count()); - } - - /** - * @group functional - */ - public function testExcludeFileSource() - { - $this->markTestSkipped('ES6 update: use ingest attachment : No handler for type [attachment] declared on field [file]'); - $indexMapping = ['file' => ['type' => 'attachment'], 'text' => ['type' => 'text', 'store' => true], - 'title' => ['type' => 'text', 'store' => true], ]; - - $indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]]; - - $index = $this->_createIndex(); - $type = new Type($index, 'content'); - - $mapping = Mapping::create($indexMapping); - $mapping->setSource(['excludes' => ['file']]); - - $mapping->setType($type); - - $index->create($indexParams, true); - $type->setMapping($mapping); - - $docId = 1; - $text = 'Basel World'; - $title = 'No Title'; - - $doc1 = new Document($docId); - $doc1->addFile('file', BASE_PATH.'/data/test.docx'); - $doc1->set('text', $text); - $doc1->set('title', $title); - $type->addDocument($doc1); - - // Optimization necessary, as otherwise source still in realtime get - $index->forcemerge(); - - $data = $type->getDocument($docId)->getData(); - $this->assertEquals($data['title'], $title); - $this->assertEquals($data['text'], $text); - $this->assertFalse(isset($data['file'])); - } - /** * @group functional * @expectedException \Elastica\Exception\ResponseException diff --git a/test/Elastica/Processor/AttachmentTest.php b/test/Elastica/Processor/AttachmentTest.php index 001001cf9a..de57bf7968 100644 --- a/test/Elastica/Processor/AttachmentTest.php +++ b/test/Elastica/Processor/AttachmentTest.php @@ -1,8 +1,11 @@ assertEquals($expected, $processor->toArray()); } + + /** + * @group functional + */ + public function testAttachmentAddPdf() + { + $attachment = new Attachment('data'); + $pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment'); + $pipeline->addProcessor($attachment); + $pipeline->create(); + + $index = $this->_createIndex(); + $type = $index->getType('bulk_test'); + + $bulk = new Bulk($index->getClient()); + $bulk->setIndex($index); + $bulk->setType($type); + + $doc1 = new Document(null); + $doc1->addFile('data', BASE_PATH.'/data/test.pdf'); + + $doc2 = new Document(2, ['data' => '', 'text' => 'test running in basel']); + + $bulk->addDocuments([ + $doc1, $doc2 + ]); + $bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment'); + + $bulk->send(); + $index->refresh(); + + $resultSet = $type->search('xodoa'); + $this->assertEquals(1, $resultSet->count()); + + $resultSet = $type->search('test'); + $this->assertEquals(2, $resultSet->count()); + + // Author is ruflin + $resultSet = $type->search('ruflin'); + $this->assertEquals(1, $resultSet->count()); + + // String does not exist in file + $resultSet = $type->search('guschti'); + $this->assertEquals(0, $resultSet->count()); + } + + /** + * @group functional + */ + public function testAttachmentAddPdfFileContent() + { + $attachment = new Attachment('data'); + $pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment'); + $pipeline->addProcessor($attachment); + $pipeline->create(); + + $index = $this->_createIndex(); + $type = $index->getType('bulk_test'); + + $bulk = new Bulk($index->getClient()); + $bulk->setIndex($index); + $bulk->setType($type); + + $doc1 = new Document(null); + $doc1->addFile('data', BASE_PATH.'/data/test.pdf'); + $doc1->set('text', 'basel world'); + + $doc2 = new Document(2, ['data' => '', 'text' => 'test running in basel']); + $doc2->set('text', 'running in basel'); + + $bulk->addDocuments([ + $doc1, $doc2 + ]); + $bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment'); + + $bulk->send(); + $index->forcemerge(); + + $resultSet = $type->search('xodoa'); + $this->assertEquals(1, $resultSet->count()); + + $resultSet = $type->search('basel'); + $this->assertEquals(2, $resultSet->count()); + + // Author is ruflin + $resultSet = $type->search('ruflin'); + $this->assertEquals(1, $resultSet->count()); + + // String does not exist in file + $resultSet = $type->search('guschti'); + $this->assertEquals(0, $resultSet->count()); + } + + /** + * @group functional + */ + public function testAddWordxFile() + { + $attachment = new Attachment('data'); + $pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment'); + $pipeline->addProcessor($attachment); + $pipeline->create(); + + $index = $this->_createIndex(); + $type = $index->getType('bulk_test'); + + $bulk = new Bulk($index->getClient()); + $bulk->setIndex($index); + $bulk->setType($type); + + $doc1 = new Document(null); + $doc1->addFile('data', BASE_PATH.'/data/test.docx'); + $doc1->set('text', 'basel world'); + + $doc2 = new Document(2, ['data' => '', 'text' => 'test running in basel']); + + $bulk->addDocuments([ + $doc1, $doc2 + ]); + $bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment'); + + $bulk->send(); + $index->refresh(); + + $resultSet = $type->search('basel'); + $this->assertEquals(2, $resultSet->count()); + + $resultSet = $type->search('ruflin'); + $this->assertEquals(0, $resultSet->count()); + + $resultSet = $type->search('Xodoa'); + $this->assertEquals(1, $resultSet->count()); + + // String does not exist in file + $resultSet = $type->search('guschti'); + $this->assertEquals(0, $resultSet->count()); + } + + /** + * @group functional + */ + public function testExcludeFileSource() + { + $attachment = new Attachment('data'); + $pipeline = $this->_createPipeline('my_custom_pipeline_attachment', 'pipeline for Attachment'); + $pipeline->addProcessor($attachment); + $pipeline->create(); + + $indexMapping = ['data' => ['type' => 'text'], 'text' => ['type' => 'text', 'store' => true], + 'title' => ['type' => 'text', 'store' => true], ]; + + $indexParams = ['index' => ['number_of_shards' => 1, 'number_of_replicas' => 0]]; + + $index = $this->_createIndex(); + $type = new Type($index, 'content'); + + $mapping = Type\Mapping::create($indexMapping); + $mapping->setSource(['excludes' => ['data']]); + + $mapping->setType($type); + + $index->create($indexParams, true); + $type->setMapping($mapping); + + $docId = 1; + $text = 'Basel World'; + $title = 'No Title'; + + $doc1 = new Document($docId); + $doc1->set('text', $text); + $doc1->set('title', $title); + $doc1->addFile('data', BASE_PATH.'/data/test.docx'); + + $bulk = new Bulk($index->getClient()); + $bulk->setIndex($index); + $bulk->setType($type); + + $bulk->addDocuments([ + $doc1 + ]); + $bulk->setRequestParam('pipeline', 'my_custom_pipeline_attachment'); + + // Optimization necessary, as otherwise source still in realtime get + $bulk->send(); + $index->forcemerge(); + + $data = $type->getDocument($docId)->getData(); + $this->assertEquals($data['title'], $title); + $this->assertEquals($data['text'], $text); + $this->assertFalse(isset($data['file'])); + } } \ No newline at end of file